<a href="https://colab.research.google.com/github/SAGARGAUD01/AI-Driven-Media-Investment-Plan-/blob/main/Dual_Delight_AIMediaInvest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# %pip install pandas==1.3.3
# %pip install scikit-learn==0.24.2
# %pip install matplotlib==3.4.3
# %pip install seaborn==0.11.2

#### Loading the data from the dataset

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder

#### Reading the data from files in both the folders and combining them

In [3]:
import pandas as pd
pd.set_option('display.max_columns', None)

folder1 = 'Dataset 1/'
folder2 = 'Dataset 2/'

google_ads_file = 'googleads-performance.csv'
meta_ads_file = 'metaads-performance.csv'
microsoft_ads_file = 'microsoftads-performance.csv'
website_landings_file = 'website-landings.csv'

google_ads_df1 = pd.read_csv(folder1 + google_ads_file)
meta_ads_df1 = pd.read_csv(folder1 + meta_ads_file)
microsoft_ads_df1 = pd.read_csv(folder1 + microsoft_ads_file)
website_landings_df1 = pd.read_csv(folder1 + website_landings_file)

google_ads_df2 = pd.read_csv(folder2 + google_ads_file)
meta_ads_df2 = pd.read_csv(folder2 + meta_ads_file)
microsoft_ads_df2 = pd.read_csv(folder2 + microsoft_ads_file)
website_landings_df2 = pd.read_csv(folder2 + website_landings_file)

google_ads_df = pd.concat([google_ads_df1, google_ads_df2], ignore_index=True)
meta_ads_df = pd.concat([meta_ads_df1, meta_ads_df2], ignore_index=True)
microsoft_ads_df = pd.concat([microsoft_ads_df1, microsoft_ads_df2], ignore_index=True)
website_landings_df = pd.concat([website_landings_df1, website_landings_df2], ignore_index=True)

print("Google Ads DataFrame:")
print(google_ads_df.head())

print("\nMeta Ads DataFrame:")
print(meta_ads_df.head())

print("\nMicrosoft Ads DataFrame:")
print(microsoft_ads_df.head())

print("\nWebsite Landings DataFrame:")
print(website_landings_df.head())


FileNotFoundError: [Errno 2] No such file or directory: 'Dataset 1/googleads-performance.csv'

In [None]:
def preprocess_ads_data(df, platform):
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
    df['Platform'] = platform
    df.fillna(0, inplace=True)
    return df

In [None]:
google_ads_df = preprocess_ads_data(google_ads_df, 'Google Ads')
meta_ads_df = preprocess_ads_data(meta_ads_df, 'Meta Ads')
microsoft_ads_df = preprocess_ads_data(microsoft_ads_df, 'Microsoft Ads')

In [None]:
website_landings_df['Website Landing Time'] = pd.to_datetime(website_landings_df['Website Landing Time'], format='%Y-%m-%d %H:%M:%S')
website_landings_df['Is Converted'] = website_landings_df['Is Converted'].astype(int)

In [None]:
ads_data = pd.concat([google_ads_df, meta_ads_df, microsoft_ads_df], axis=0)

#### Overview of the data

In [None]:
ads_data.tail()

In [None]:
ads_data.describe()

### EDA: Analyze Customer Journey and Ad Performance

#### Plotting Impressions vs. Conversions for each platform

In [None]:
plt.figure(figsize=(14, 6))
sns.scatterplot(data=ads_data, x='Impressions', y='Conversions', hue='Platform')
plt.title('Impressions vs. Conversions by Platform')
plt.show()

#### Plotting Cost vs. Revenue for each platform

In [None]:
plt.figure(figsize=(14, 6))
sns.scatterplot(data=ads_data, x='Cost', y='Revenue', hue='Platform')
plt.title('Cost vs. Revenue by Platform')
plt.show()

#### Conversion Rate by Source

In [None]:
conversion_rate_by_source = website_landings_df.groupby('Source')['Is Converted'].mean().sort_values(ascending=False)
conversion_rate_by_source.plot(kind='bar', figsize=(10, 5), title='Conversion Rate by Source')
plt.ylabel('Conversion Rate')
plt.show()

#### Conversion Rate by Channel

In [None]:
conversion_rate_by_channel = website_landings_df.groupby('Channel')['Is Converted'].mean().sort_values(ascending=False)
conversion_rate_by_channel.plot(kind='bar', figsize=(10, 5), title='Conversion Rate by Channel')
plt.ylabel('Conversion Rate')
plt.show()

##### One-Hot Encode categorical columns - just for HeatMap

In [None]:
df = pd.get_dummies(ads_data, columns=['Campaign type', 'Platform'])

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix for Ad Performance Metrics')
plt.show()

#### Feature Engineering: Aggregating Data

In [None]:
ads_data['CTR'] = ads_data['Clicks'] / ads_data['Impressions'] # Click-Through Rate
ads_data['CPC'] = ads_data['Cost'] / ads_data['Clicks'] # Cost Per Click
ads_data['CPA'] = ads_data['Cost'] / ads_data['Conversions'] # Cost Per Acquisition

In [None]:
ads_data.replace([np.inf, -np.inf], np.nan, inplace=True)
ads_data.fillna(0, inplace=True)

#### Summarize performance by platform and campaign

In [None]:
performance_summary = ads_data.groupby(['Platform', 'Campaign type']).agg({
    'Impressions': 'sum',
    'Clicks': 'sum',
    'Cost': 'sum',
    'Conversions': 'sum',
    'Revenue': 'sum'
}).reset_index()

In [None]:
print(performance_summary)

#### Standardize Data

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(performance_summary[['Impressions', 'Clicks', 'Cost', 'Conversions', 'Revenue']])

#### Apply KMeans Clustering

In [None]:
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
performance_summary['Cluster'] = kmeans.fit_predict(X)

#### Visualize Clusters

In [None]:
performance_summary['Cluster'].describe()

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=performance_summary,
    x='Impressions',
    y='Revenue',
    hue='Cluster',
    palette='viridis',
    style='Cluster',
    s=100,
    legend='full'
)

plt.title('Clusters based on Impressions and Revenue')
plt.xlabel('Impressions')
plt.ylabel('Revenue')
plt.legend(title='Cluster')
plt.grid(True)
plt.show()

In [None]:
X = ads_data[['Impressions', 'Clicks', 'Cost']]
y = ads_data['Conversions']

In [None]:
model = LinearRegression()
model.fit(X, y)
ads_data['Predicted Conversions'] = model.predict(X)

#### Budget Reallocation Logic

In [None]:
total_budget = 200000
min_budget_per_channel = 0.1 * total_budget

In [None]:
# Calculate budget allocation
def allocate_budget(df, total_budget):
    df['Budget Allocation'] = (df['Predicted Conversions'] / df['Predicted Conversions'].sum()) * total_budget
    return df

In [None]:
budget_allocation = allocate_budget(ads_data, total_budget)

In [None]:
# Ensure minimum budget allocation constraint
def enforce_min_budget(df, min_budget):
    df.loc[df['Budget Allocation'] < min_budget, 'Budget Allocation'] = min_budget
    return df

In [None]:
budget_allocation = enforce_min_budget(budget_allocation, min_budget_per_channel)

#### Output: Cumulated Budgets and Estimated Conversions

In [None]:
budget_summary = budget_allocation.groupby(['Platform', 'Campaign type']).agg({
    'Budget Allocation': 'sum',
    'Predicted Conversions': 'sum'
}).reset_index()

print(budget_summary)

#### Visualization of Budget Allocation

In [None]:
plt.figure(figsize=(12, 6))
sns.barplot(x='Platform', y='Budget Allocation', hue='Campaign type', data=budget_summary)
plt.title('Budget Allocation by Platform and Campaign Type')
plt.xlabel('Platform')
plt.ylabel('Budget Allocation ($)')
plt.show()

#### Preparing data for modelling

In [None]:
X = ads_data[['Impressions', 'Clicks', 'Cost', 'CTR', 'CPC']]
y = ads_data['Conversions']

In [None]:
X.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train.replace([np.inf, -np.inf], np.nan, inplace=True)
X_test.replace([np.inf, -np.inf], np.nan, inplace=True)
X_train.fillna(0, inplace=True)
X_test.fillna(0, inplace=True)

In [None]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)
print("Linear Regression - RMSE:", np.sqrt(mean_squared_error(y_test, lr_predictions)))
print("Linear Regression - R2 Score:", r2_score(y_test, lr_predictions))

In [None]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
print("Random Forest - RMSE:", np.sqrt(mean_squared_error(y_test, rf_predictions)))
print("Random Forest - R2 Score:", r2_score(y_test, rf_predictions))

#### Budget Allocation

In [None]:
total_budget = 200000
min_budget_per_channel = 0.1 * total_budget

In [None]:
def allocate_budget(df, total_budget):
    df['Budget Allocation'] = (df['Predicted Conversions'] / df['Predicted Conversions'].sum()) * total_budget
    return df

In [None]:
budget_allocation = allocate_budget(ads_data, total_budget)

In [None]:
def enforce_min_budget(df, min_budget):
    df.loc[df['Budget Allocation'] < min_budget, 'Budget Allocation'] = min_budget
    return df

In [None]:

budget_allocation = enforce_min_budget(budget_allocation, min_budget_per_channel)

#### Visualization of Budget Allocation

In [None]:
budget_summary = budget_allocation.groupby(['Platform', 'Campaign type']).agg({
    'Budget Allocation': 'sum',
    'Predicted Conversions': 'sum'
}).reset_index()

print(budget_summary)

In [None]:
plt.figure(figsize=(12, 6))
sns.barplot(x='Platform', y='Budget Allocation', hue='Campaign type', data=budget_summary)
plt.title('Budget Allocation by Platform and Campaign Type')
plt.xlabel('Platform')
plt.ylabel('Budget Allocation ($)')
plt.show()

#### Budget Allocation

##### Budget Allocation by user input