# Time Series Analysis
1. Analyze the trend of deal creation over time and its relationship with calls.
2. Study the distribution of deal closing times and the duration from creation to closure.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [None]:
deals = pd.read_pickle('deals_df.pkl')
calls = pd.read_pickle('calls_df.pkl')

# 1. Analysis of the trend of deal creation over time and its relationship with calls.

In [None]:
# Filtering deals where the deal date does not precede the first call, in order to avoid distorting the picture of the dependence of deals on calls
deals = deals[deals['Created Time'] >= calls['Call Start Time'].min()]

In [None]:
# Grouping by months
calls1 = calls.groupby(calls['Call Start Time'].dt.to_period('M')).size()
deals1 = deals.groupby(deals['Created Time'].dt.to_period('M')).size()
# Converting indexes to datetime format
deals1.index = deals1.index.to_timestamp()
calls1.index = calls1.index.to_timestamp()

## Chart of the trend of deal creation over time and its relationship with calls

In [None]:
# I am leaving only those rows where the value of the 'Call Duration (in seconds)' column is greater than 0. This means that I am considering only completed calls, excluding calls with zero duration.
calls_done = calls[calls['Call Duration (in seconds)'] > 0].groupby(calls['Call Start Time'].dt.to_period('M')).size()
calls_done.index = calls_done.index.to_timestamp()

plt.figure(figsize=(15, 6))

plt.plot(deals1.index, deals1, label='Deals', marker='o', color='blue')
for i, value in enumerate(deals1):
    plt.annotate(f' {value} ', (deals1.index[i], value), textcoords="offset points", xytext=(0, 5), ha='center')

# Plotting the graph for calls
plt.plot(calls1.index, calls1, label='Calls', marker='o', color='red')
for i, value in enumerate(calls1):
    plt.annotate(f' {value} ', (calls1.index[i], value), textcoords="offset points", xytext=(0, 5), ha='center')

# Plotting the graph for completed calls
plt.plot(calls_done.index, calls_done, label='Completed Calls', marker='o', linestyle='-.', color='green')
for i, value in enumerate(calls_done):
    plt.annotate(f' {value} ', (calls_done.index[i], value), textcoords="offset points", xytext=(0, 5), ha='center')

# Setting the title, axes, and legend
plt.title('Trend of Deal Creation Over Time and Its Relation to Calls')
plt.xlabel('Month')
plt.ylabel('Number')
plt.yticks([])
plt.legend()

plt.show()

## Visualizing the change in conversion rate over time

In [None]:
conversion_rate = deals1 / calls1

plt.figure(figsize=(12, 4))
plt.plot(conversion_rate.index, conversion_rate, marker='o')

for i, value in enumerate(conversion_rate):
    plt.annotate(f' {value:.2f} ', (conversion_rate.index[i], value),
        textcoords='offset points', xytext=(0, 5), ha='center')

plt.xlabel('Month')
plt.ylabel('Conversion Rate')
plt.title('Dynamics of Conversion Rate')
plt.show()

In [None]:
calls_done

In [None]:
deals1

### Conclusions:
**The trend of deal creation over time and its connection to calls:**  
- The number of calls increased from 1935 in July 2023 to a peak of 13328 in April 2024, after which it decreased to 8495 in June 2024.  
- The number of deals also grew from 655 in July 2023 to a peak of 3081 in March 2024, then decreased to 1674 in June 2024.  
- The number of completed calls shows a similar dynamic, increasing from 4 in July 2023 to 10026 in March 2024 and decreasing to 6212 in June 2024.  
- It is evident that there is a close connection between the number of calls and deals - an increase in calls leads to an increase in the number of deals, while a decrease in calls leads to a reduction in deals.
  
**The dynamics of the conversion rate:**  
- At the beginning (July 2023), the conversion rate was quite high - 0.34. This means that out of 100 calls during this period, about 34 deals were made.  
- But then the rate began to decline. By May 2024, it had dropped to 0.21.  
- Then the situation stabilized somewhat - the rate held at around 0.22-0.23 from March to April 2024.  
- Thus, despite the increase in the overall number of calls and deals, the effectiveness of working with clients (the ability to convert calls into actual deals) was declining.

# 2. Analysis of the distribution of deal closing times and the duration from creation to closure.

## Chart of the Number of Deals and the Average Duration from Creation to Closing

In [None]:
deals = pd.read_pickle('deals_df.pkl')
# Removing from consideration records dated 05.05.2025, which I used to fill in deals without a closing date.
deals1 = deals[deals['Closing Date'] < '2024-05-05']

In [None]:
from scipy.stats import pearsonr

# Calculating the duration from deal creation to closure in days
# Creating a new DataFrame with the required column
deals1 = pd.DataFrame(deals1)
deals1['duration'] = (deals1['Closing Date'] - deals1['Created Time']).dt.days

# Aggregating data by weeks for the number of deals
deals_count = deals1.resample('W', on='Closing Date').size()

# Aggregating data by weeks for the average duration (in days)
deals_w = deals1.resample('W', on='Closing Date')['duration'].mean()

# Calculating the correlation between the number of deals and the average closure duration
correlation, p_value = pearsonr(deals_count, deals_w)

plt.figure(figsize=(10, 6))

# Plotting the average duration on the first Y-axis
plt.plot(deals_w.index, deals_w, label='Average Duration', marker='o', linestyle='-.', color='red')
plt.ylabel('Average Duration (days)', color='red')

# Setting the X-axis to display weeks
# plt.ylabel('Number of Deals', color='darkblue')
plt.xticks(rotation=45)
plt.title('Number of Deals and Average Duration from Creation to Closure')

# Annotations for average duration on the red graph
for i, value in enumerate(deals_w):
    plt.annotate(f' {value:.1f} ', (deals_w.index[i], value), textcoords="offset points", xytext=(0, 5), ha='center', fontsize=10)

# Second Y-axis for the number of deals
ax2 = plt.gca().twinx()
ax2.plot(deals_count.index, deals_count, label='Number of Deals', marker='o', color='blue')

plt.text(0.01, 0.95, f'Correlation Coefficient: {correlation:.2f} ', transform=plt.gca().transAxes, fontsize=10, verticalalignment='top')
significance_text = 'significant' if p_value < 0.05 else 'not significant'
plt.text(0.01, 0.9, f'p-value: {p_value} ({significance_text})', transform=plt.gca().transAxes, fontsize=10, verticalalignment='top')

# Adding a legend
plt.legend(loc='lower right')

plt.grid(True, which='major', axis='both', linestyle='--', linewidth=0.7)
plt.tight_layout()
plt.show()

# Print monthly deal counts (I did not display this on the graph to avoid cluttering the visualization with extra numbers)
print(deals_count)

## Heatmap of the duration of deals by month of creation and closure

In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

deals = pd.read_pickle('deals_df.pkl')

# Removing records with the date 05.05.2025 (filled for deals without a closing date) from the calculation
deals1 = deals[deals['Closing Date'] < '2024-05-05']

# Converting to DataFrame (although it is already a DataFrame after filtering)
# deals1 = pd.DataFrame(deals1)

# Calculating duration in days (checking for missing values)
deals1 = deals1.dropna(subset=['Closing Date', 'Created Time'])  # Removing rows with missing values
deals1['duration'] = (deals1['Closing Date'] - deals1['Created Time']).dt.days

# Adding columns for the months of creation and closure
deals1['deal_create_m'] = deals1['Created Time'].dt.to_period('M')  # Month of creation
deals1['deal_close_m'] = deals1['Closing Date'].dt.to_period('M')  # Month of closure
# Aggregating average duration by months of creation and closure
heatmap_data = deals1.groupby(['deal_create_m', 'deal_close_m'])['duration'].mean().unstack()

# Creating a heatmap
plt.figure(figsize=(12, 6))
heatmap = sns.heatmap(
    heatmap_data,
    annot=True,
    cmap='YlOrRd',
    fmt='.1f',
    linewidths=0.5,
    cbar=True,
    cbar_kws={'label': 'Average Duration (days)'}
)

# Setting titles and labels
plt.title('Heatmap of Deal Duration by Months of Creation and Closure', fontsize=14)

## Conclusions:
**Correlation Chart of the Number of Deals and Average Duration from Creation to Closure**

- **Strong Positive Correlation**  
The chart shows a strong positive correlation (0.75) between the number of deals and the average closure duration, with both metrics increasing from July 2023 to May 2024. Periods of high workload (October 2023, March–May 2024) are accompanied by an increase in duration.

- **Efficiency with a Low Number of Deals**  
When there are few deals (17–98), they are closed quickly — within 1.2–3.2 days (July–August 2023). This shows that everything works well with a small volume.

- **Challenges with a High Number of Deals**  
As the number of deals increases to 400–577 (for example, 489 in November 2023 or 577 in April 2024), the duration increases to 14–23.7 days. This may indicate a lack of personnel or that the process is becoming more complicated.

- **Progress in 2024**  
In 2024, the number of deals peaked (577 in April), but the average duration remained moderate — around 15–23 days (for example, 15.9 days in May with 315 deals). This indicates that work has become better organized despite the high workload.

**Heatmap of Deal Duration by Months of Creation and Closure**  
The heatmap shows that deal duration is decreasing over time: from long cycles in 2022–2023 (up to 357 days) to very short durations in 2024 (0.5–7.2 days). This indicates an improvement in processes but requires checking for anomalously long deals.

**Recommendations**  
As the number of deals increases, the average duration of their closure also increases. This may indicate a lack of resources or complexities in the process. It is recommended to review task distribution and possibly increase the number of employees during peak periods.

In [None]:
deals1.info()

In [None]:
import plotly.express as px

# Creating a histogram
fig = px.histogram(calls, x='Call Start Time', nbins=100,
                   title='Number of Calls Over Time',
                   template='plotly_white')

# Setting up the chart layout
fig.update_layout(xaxis_title='Call Start Time',
                  yaxis_title='Number of Calls',
                  bargap=0.1)

# Displaying the chart
fig.show()
# As expected, the number of calls decreases during the Christmas holidays, which is likely a seasonal trend.

In [None]:
spend = pd.read_pickle('spend_df.pkl')
spend.describe()

In [None]:
spend['Date'] = pd.to_datetime(spend['Date'])
spend_daily = spend.groupby(spend['Date'].dt.date).agg({'Impressions': 'sum', 'Spend': 'sum', 'Clicks': 'sum'}).reset_index()
spend_daily['Date'] = pd.to_datetime(spend_daily['Date'])

fig, ax1 = plt.subplots(figsize=(12, 6))

# Построение графиков на первой оси Y
ax1.plot(spend_daily['Date'], spend_daily['Impressions'], label='Impressions', color='blue')
ax1.set_xlabel('Дата')
ax1.set_ylabel('Impressions', color='blue')
ax1.tick_params('y', colors='blue')

# Построение графика Spend на второй оси Y
ax2 = ax1.twinx()
ax2.plot(spend_daily['Date'], spend_daily['Spend'], label='Spend', color='orange')
ax2.set_ylabel('Spend', color='orange')
ax2.tick_params('y', colors='orange')

# Построение графика Clicks на второй оси Y
ax2.plot(spend_daily['Date'], spend_daily['Clicks'], label='Clicks', color='green')
ax2.set_ylabel('Clicks', color='green')
ax2.tick_params('y', colors='green')

plt.title('Тренды по времени')
plt.xticks(rotation=45)
plt.legend(loc='upper left')
plt.tight_layout()
# plt.savefig('my_plot.png')
plt.show()





- The data covers the time period from **July 2023 to July 2024**:
- **Activity peaks** occur in certain months, such as **2023-07** and **2023-11**, where click and impression rates are the highest.  
- In some periods (for example, **2024-01**), there is a decrease in activity, which may be related to seasonal fluctuations or a decline in advertising activity.