In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

In [5]:

data = pd.read_csv('bookmyshow_ads.csv')
print(data.info())



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11055 entries, 0 to 11054
Data columns (total 32 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   index                        11055 non-null  object
 1   having_IPhaving_IP_Address   11055 non-null  int64 
 2   URLURL_Length                11055 non-null  int64 
 3   Shortining_Service           11055 non-null  int64 
 4   having_At_Symbol             11055 non-null  int64 
 5   double_slash_redirecting     11055 non-null  int64 
 6   Prefix_Suffix                11055 non-null  int64 
 7   having_Sub_Domain            11055 non-null  int64 
 8   SSLfinal_State               11055 non-null  int64 
 9   Domain_registeration_length  11055 non-null  int64 
 10  Favicon                      11055 non-null  int64 
 11  port                         11055 non-null  int64 
 12  HTTPS_token                  11055 non-null  int64 
 13  Request_URL                  11

In [6]:
data = data.drop(['index'], axis=1)
print(data.isnull().sum())

# Scale numerical columns
scaler = StandardScaler()
numerical_cols = data.columns[1:-1]
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data.drop(['Result'], axis=1), data['Result'], random_state=1)
X_train.info()



having_IPhaving_IP_Address     0
URLURL_Length                  0
Shortining_Service             0
having_At_Symbol               0
double_slash_redirecting       0
Prefix_Suffix                  0
having_Sub_Domain              0
SSLfinal_State                 0
Domain_registeration_length    0
Favicon                        0
port                           0
HTTPS_token                    0
Request_URL                    0
URL_of_Anchor                  0
Links_in_tags                  0
SFH                            0
Submitting_to_email            0
Abnormal_URL                   0
Redirect                       0
on_mouseover                   0
RightClick                     0
popUpWidnow                    0
Iframe                         0
age_of_domain                  0
DNSRecord                      0
web_traffic                    0
Page_Rank                      0
Google_Index                   0
Links_pointing_to_page         0
Statistical_report             0
Result    

In [None]:
# Decision Tree Classifier
dtc = DecisionTreeClassifier()
param_grid = {'max_features': ['sqrt', 'log2', None], 'criterion': ['gini', 'entropy', 'log_loss']}
search = GridSearchCV(dtc, param_grid, cv=3)
search.fit(X_train, y_train)

best_params = search.best_params_
best_model = DecisionTreeClassifier(max_features=best_params['max_features'], criterion=best_params['criterion'])
best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print("Best Hyperparameters:", best_params)
print("Accuracy:", accuracy)

# RandomForestClassifier
rfc = RandomForestClassifier()
param_grid = {'n_estimators': [10, 50, 100, 500]}
search = GridSearchCV(rfc, param_grid, cv=3)
search.fit(X_train, y_train)

best_params = search.best_params_
best_model = RandomForestClassifier(n_estimators=best_params['n_estimators'])
best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print("Best Hyperparameters:", best_params)
print("Accuracy:", accuracy)

# AdaBoostClassifier
abc = AdaBoostClassifier()
param_grid = {'n_estimators': [10, 50, 100, 500]}
search = GridSearchCV(abc, param_grid, cv=3)
search.fit(X_train, y_train)

best_params = search.best_params_
best_model = AdaBoostClassifier(n_estimators=best_params['n_estimators'])
best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print("Best Hyperparameters:", best_params)
print("Accuracy:", accuracy)

# GradientBoostingClassifier
gbc = GradientBoostingClassifier()
param_grid = {'n_estimators': [10, 50, 100, 500], 'learning_rate': [0.1, 0.01, 0.001]}
search = GridSearchCV(gbc, param_grid, cv=3)
search.fit(X_train, y_train)

best_params = search.best_params_
best_model = GradientBoostingClassifier(n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'])
best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print("Best Hyperparameters:", best_params)
print("Accuracy:", accuracy)


Best Hyperparameters: {'criterion': 'entropy', 'max_features': None}
Accuracy: 0.9659913169319826
Best Hyperparameters: {'n_estimators': 100}
Accuracy: 0.9710564399421129
Best Hyperparameters: {'n_estimators': 50}
Accuracy: 0.9345151953690304
