In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

class ClickPredictor:
    def __init__(self):
        self.model = DecisionTreeClassifier(random_state=42)
        self.encoders = {}
        self.columns_to_encode = ['gender', 'device_type', 'ad_position', 'browsing_history', 'time_of_day']
        self.target_column = 'click'

    def preprocess(self, df, fit=False):
        df = df.copy()

        if 'full_name' in df.columns:
            df.drop(columns=['full_name'], inplace=True)

        df['age'] = df['age'].fillna(df['age'].median())

        for col in self.columns_to_encode:
            df[col] = df[col].fillna("Missing")
            if fit:
                le = LabelEncoder()
                df[col] = le.fit_transform(df[col].astype(str))
                self.encoders[col] = le
            else:
                le = self.encoders[col]
                df[col] = le.transform(df[col].astype(str))

        return df

    def fit(self, train_df):
        train_df = self.preprocess(train_df, fit=True)
        X = train_df.drop(columns=[self.target_column, 'id'])
        y = train_df[self.target_column]
        self.model.fit(X, y)

    def predict(self, test_df):
        test_ids = test_df['id']
        test_df = self.preprocess(test_df, fit=False)
        X_test = test_df.drop(columns=['id'])
        predictions = self.model.predict(X_test)
        return pd.DataFrame({'id': test_ids, 'click': predictions})

    def evaluate(self, df):
        df = self.preprocess(df, fit=False)
        X = df.drop(columns=[self.target_column, 'id'])
        y = df[self.target_column]
        preds = self.model.predict(X)
        print("\nModel Evaluation:")
        print("Accuracy:", accuracy_score(y, preds))
        print("\nClassification Report:\n", classification_report(y, preds))

train_df = pd.read_csv("C:\\Users\\shaik\\Downloads\\train.csv")
test_df = pd.read_csv("C:\\Users\\shaik\\Downloads\\test.csv")

predictor = ClickPredictor()
predictor.fit(train_df)
predictor.evaluate(train_df)

predictions = predictor.predict(test_df)
predictions.to_csv("submission.csv", index=False)
print("\nsubmission.csv file created.")