In [None]:
import pandas as pd

# Load your CSV data
df = pd.read_csv('/content/advertising.csv')

# Inspect the first few rows
df.head()

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Ad Topic Line,City,Male,Country,Timestamp,Clicked on Ad
0,68.95,35,61833.9,256.09,Cloned 5thgeneration orchestration,Wrightburgh,0,Tunisia,2016-03-27 00:53:11,0
1,80.23,31,68441.85,193.77,Monitored national standardization,West Jodi,1,Nauru,2016-04-04 01:39:02,0
2,69.47,26,59785.94,236.5,Organic bottom-line service-desk,Davidton,0,San Marino,2016-03-13 20:35:42,0
3,74.15,29,54806.18,245.89,Triple-buffered reciprocal time-frame,West Terrifurt,1,Italy,2016-01-10 02:31:19,0
4,68.37,35,73889.99,225.58,Robust logistical utilization,South Manuel,0,Iceland,2016-06-03 03:36:18,0


In [None]:
# Convert 'Timestamp' to datetime and extract useful features like day, month, etc.
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df['Day'] = df['Timestamp'].dt.day
df['Month'] = df['Timestamp'].dt.month
df['Year'] = df['Timestamp'].dt.year
df['Hour'] = df['Timestamp'].dt.hour
df = df.drop(columns=['Timestamp'])

# Encode categorical variables like 'City', 'Country', and 'Ad Topic Line'
df = pd.get_dummies(df, columns=['City', 'Country', 'Ad Topic Line'], drop_first=True)

# Separate features and target variable
X = df.drop(columns=['Clicked on Ad'])  # Features
y = df['Clicked on Ad']  # Target variable

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Train a Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Predict on test data
y_pred_dt = dt_model.predict(X_test)

# Evaluate the model
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))


Decision Tree Accuracy: 0.925
              precision    recall  f1-score   support

           0       0.91      0.92      0.92        89
           1       0.94      0.93      0.93       111

    accuracy                           0.93       200
   macro avg       0.92      0.92      0.92       200
weighted avg       0.93      0.93      0.93       200



In [None]:
from sklearn.ensemble import RandomForestClassifier

# Train a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict on test data
y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))


Random Forest Accuracy: 0.935
              precision    recall  f1-score   support

           0       0.93      0.92      0.93        89
           1       0.94      0.95      0.94       111

    accuracy                           0.94       200
   macro avg       0.93      0.93      0.93       200
weighted avg       0.93      0.94      0.93       200



In [None]:
from sklearn.ensemble import GradientBoostingClassifier

# Train a Gradient Boosting model
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(X_train, y_train)

# Predict on test data
y_pred_gb = gb_model.predict(X_test)

# Evaluate the model
print("Gradient Boosting Accuracy:", accuracy_score(y_test, y_pred_gb))
print(classification_report(y_test, y_pred_gb))


Gradient Boosting Accuracy: 0.92
              precision    recall  f1-score   support

           0       0.90      0.92      0.91        89
           1       0.94      0.92      0.93       111

    accuracy                           0.92       200
   macro avg       0.92      0.92      0.92       200
weighted avg       0.92      0.92      0.92       200



In [None]:
print("Decision Tree Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))

print("\nRandom Forest Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

print("\nGradient Boosting Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_gb))
print(classification_report(y_test, y_pred_gb))


Decision Tree Performance:
Accuracy: 0.925
              precision    recall  f1-score   support

           0       0.91      0.92      0.92        89
           1       0.94      0.93      0.93       111

    accuracy                           0.93       200
   macro avg       0.92      0.92      0.92       200
weighted avg       0.93      0.93      0.93       200


Random Forest Performance:
Accuracy: 0.935
              precision    recall  f1-score   support

           0       0.93      0.92      0.93        89
           1       0.94      0.95      0.94       111

    accuracy                           0.94       200
   macro avg       0.93      0.93      0.93       200
weighted avg       0.93      0.94      0.93       200


Gradient Boosting Performance:
Accuracy: 0.92
              precision    recall  f1-score   support

           0       0.90      0.92      0.91        89
           1       0.94      0.92      0.93       111

    accuracy                           0.92     

In [None]:
# improve the performance of the models by tuning their hyperparameters using techniques like Grid Search or Random Search to find the optimal parameters for each model.

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# Define the parameter grid for Decision Tree
dt_param_grid = {
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Perform Grid Search for Decision Tree
dt_grid_search = GridSearchCV(DecisionTreeClassifier(random_state=42), dt_param_grid, cv=5, scoring='accuracy')
dt_grid_search.fit(X_train, y_train)
print("Best parameters for Decision Tree:", dt_grid_search.best_params_)
y_pred_dt = dt_grid_search.predict(X_test)
print("Decision Tree Accuracy (after tuning):", accuracy_score(y_test, y_pred_dt))


# Define the parameter grid for Random Forest
rf_param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

# Perform Randomized Search for Random Forest
rf_random_search = RandomizedSearchCV(RandomForestClassifier(random_state=42), rf_param_grid, n_iter=5, cv=5, scoring='accuracy', random_state=42)
rf_random_search.fit(X_train, y_train)
print("Best parameters for Random Forest:", rf_random_search.best_params_)
y_pred_rf = rf_random_search.predict(X_test)
print("Random Forest Accuracy (after tuning):", accuracy_score(y_test, y_pred_rf))


# Define the parameter grid for Gradient Boosting
gb_param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.1],
    'max_depth': [3, 5]
}

# Perform Grid Search for Gradient Boosting
gb_grid_search = GridSearchCV(GradientBoostingClassifier(random_state=42), gb_param_grid, cv=5, scoring='accuracy')
gb_grid_search.fit(X_train, y_train)
print("Best parameters for Gradient Boosting:", gb_grid_search.best_params_)
y_pred_gb = gb_grid_search.predict(X_test)
print("Gradient Boosting Accuracy (after tuning):", accuracy_score(y_test, y_pred_gb))


Best parameters for Decision Tree: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Decision Tree Accuracy (after tuning): 0.925
Best parameters for Random Forest: {'n_estimators': 300, 'min_samples_split': 10, 'max_depth': None}
Random Forest Accuracy (after tuning): 0.93
Best parameters for Gradient Boosting: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
Gradient Boosting Accuracy (after tuning): 0.92
