In [None]:
import pandas as pd
import Tools
import hubspot as hubspot
from hubspot.crm.contacts import BatchInputSimplePublicObjectInputForCreate, ApiException
from hubspot.crm.contacts import BatchInputSimplePublicObjectBatchInput, ApiException

HubSpotDataPipeline=Tools.HubSpotDataPipeline
# Initialize the HubSpot data pipeline with API key
pipeline = HubSpotDataPipeline(api_key_to="HUBSPOT_API_KEY",api_key_from="HUBSPOT_API_KEY")

1. Extract Data:
   - This block calls the extract() method from pipeline to retrieve contact data, storing it in data_list.
   - The data is then converted into a DataFrame (data_frame) for easier handling.
   - The DataFrame is saved to a CSV file named "contacts_data_collect.csv" for record-keeping or further analysis.

In [2]:
# Extract data
data_list = pipeline.extract()
data_frame = pd.DataFrame(data_list)
print(data_frame)
# Save the extracted data
data_frame.to_csv("contacts_data_collect.csv", index=False)
print("Data saved to contacts_data_collect.csv")


       firstname     lastname  \
0            Zoe         Owen   
1           Zara      Rodwell   
2           Zara     Freeburn   
3         Winnie       Walter   
4            Zoe         Owen   
5           Zara     Oldfield   
6           Zara     Freeburn   
7           Zara      Rodwell   
8           Zara     Oldfield   
9         Willow       Morris   
10        Willow         Nash   
11        Willow         Nash   
12        Willow       Morris   
13        Winnie      Bristow   
14        Willow        Jones   
15        Winnie       Walter   
16        Winnie      Bristow   
17       William   Wooldridge   
18       William    Wilkinson   
19       William         Tate   
20       William   Wooldridge   
21       William    Wilkinson   
22       William      Webster   
23       William      Webster   
24       William    Wilkinson   
25        Willow        Jones   
26       William      Neville   
27       William        Nobbs   
28       William         Tate   
29       W

2. Data Animation:
   - Tools.car(0) is a function (likely defined in your code) that provides an animation or message indicating that data import has completed successfully.

In [3]:
Tools.car(0)  # Indicate data import with car animation


-------─────▄▌▐▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▌ ---------------------------------
-------───▄▄██▌█ ..Data Imported..      ---------------------------------
-------▄▄▄▌▐██▌█ ................       ---------------------------------
-------███████▌█▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▌ ---------------------------------
-------▀(@)▀▀▀▀▀▀▀(@)(@)▀▀▀▀▀▀▀▀▀▀▀▀(@)▀---------------------------------



3. Transform Data:
   - Tools.car(2) starts an animation or message indicating that data transformation is in progress.
   - The transform() function from pipeline processes data_list, performing tasks like cleaning data and merging duplicates.
   - The transformed data is saved to "contacts_data_result.csv" for future use.

In [None]:
Tools.car(2)  # Indicate data transform
# Transform the data and save the transformed DataFrame
data_frame = pipeline.transform(data_list)
data_frame.to_csv("contacts_data_result.csv", index=False)
print("Data saved to contacts_data_result.csv")

In [None]:
# Load data from a CSV file into a DataFrame
# data_frame = pd.read_csv("contacts_data_result.csv")

Analysis 1: Distribution of Contacts by Country

Objective: To visualize the number of contacts from each country.
Process:
- Uses `value_counts()` on the 'country' column of the DataFrame to count the number of contacts per country.
- Generates a bar chart using `plot(kind='bar', color='skyblue')` to display the distribution of contacts by country visually.
Labels:
- The x-axis is labeled "Country," and the y-axis is labeled "Number of Contacts."
- The x-axis labels are rotated by 45 degrees for readability.

In [None]:
# Analysis 1: Distribution of contacts by country
country_counts = df['country'].value_counts()
plt.figure(figsize=(10, 6))
country_counts.plot(kind='bar', color='skyblue')
plt.title('Distribution of Contacts by Country')
plt.xlabel('Country')
plt.ylabel('Number of Contacts')
plt.xticks(rotation=45)
plt.show()

Analysis 3: Contact Creation Trend Over Time

Objective: To examine the trend of contact creation over time, aggregated by month.
Process:
- Converts the 'technical_test___create_date' column to datetime format for date manipulation.
- Groups dates by month using `dt.to_period('M')`, and calculates the number of contacts created per month with `value_counts()`.
- Generates a line plot to display the trend of contact creation over time.
Labels:
- The x-axis is labeled "Month," and the y-axis is labeled "Number of Contacts Created."
- The x-axis labels are rotated by 45 degrees for readability.

In [None]:
# Analysis 2: Top email domains by contact count
# Extract the email domain
df['email_domain'] = df['raw_email'].apply(lambda x: x.split('@')[-1] if pd.notna(x) else None)

# Count the frequency of each email domain
domain_counts = df['email_domain'].value_counts()

# Create bar chart of the most common email domains
plt.figure(figsize=(10, 6))
domain_counts.plot(kind='bar', color='salmon')
plt.title('Top Email Domains by Contact Count')
plt.xlabel('Email Domain')
plt.ylabel('Number of Contacts')
plt.xticks(rotation=45)
plt.show()

Analysis 3: Contact Creation Trend Over Time

Objective: To examine the trend of contact creation over time, aggregated by month.
Process:
- Converts the 'technical_test___create_date' column to datetime format for date manipulation.
- Groups dates by month using `dt.to_period('M')`, and calculates the number of contacts created per month with `value_counts()`.
- Generates a line plot to display the trend of contact creation over time.
Labels:
- The x-axis is labeled "Month," and the y-axis is labeled "Number of Contacts Created."
- The x-axis labels are rotated by 45 degrees for readability.

In [None]:
# Analysis 3: Contact creation trend over time
df['technical_test___create_date'] = pd.to_datetime(df['technical_test___create_date'])
date_counts = df['technical_test___create_date'].dt.to_period('M').value_counts().sort_index()
plt.figure(figsize=(12, 6))
date_counts.plot(kind='line', marker='o', color='green')
plt.title('Trend of Contact Creation Dates')
plt.xlabel('Month')
plt.ylabel('Number of Contacts Created')
plt.xticks(rotation=45)
plt.show()

4. Load Data into HubSpot:
   - This part reads the transformed data from the CSV file into a new DataFrame (data_frame).
   - The load() method from pipeline then pushes the transformed data to HubSpot.
   - Tools.car(1) displays an animation or message confirming that data export is complete.

In [None]:
# Load the transformed data into HubSpot
pipeline.load(data_frame)
Tools.car(1)  # Indicate data export with car animation