In [1]:

import pandas as pd
import numpy as np


from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score


import matplotlib.pyplot as plt
import seaborn as sns


import warnings
warnings.filterwarnings("ignore")


In [2]:
import pandas as pd


users = pd.read_csv('user_demographics.csv')
watch_history = pd.read_csv('watch_history.csv')
ads = pd.read_csv('ads.csv')


print("User Demographics:")
print(users.head())

print("\nWatch History:")
print(watch_history.head())

print("\nAd Characteristics:")
print(ads.head())


User Demographics:
   user_id  age gender     location income_level
0        1   26      F  Los Angeles          Low
1        2   21      F  Los Angeles       Medium
2        3   20      M     New York         High
3        4   30      F      Chicago         High
4        5   29      F  Los Angeles          Low

Watch History:
   user_id recently_watched_categories  watch_duration watch_frequency
0        1             Horror|Thriller              73         Monthly
1        2               Drama|Romance             168           Daily
2        3               Comedy|Family             106          Weekly
3        4               Sports|Action              82           Daily
4        5               Drama|Romance              75         Monthly

Ad Characteristics:
   ad_id category target_age_group target_gender target_income_level
0    101   Action            36-50             F                 Low
1    102    Drama            26-35             M              Medium
2    103   Horror

In [3]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler


data = pd.merge(users, watch_history, on='user_id')


encoder = OneHotEncoder()
encoded_gender = encoder.fit_transform(data[['gender']]).toarray()
data['gender_encoded'] = encoded_gender[:, 0]  


scaler = StandardScaler()
data[['age', 'watch_duration']] = scaler.fit_transform(data[['age', 'watch_duration']])

print(data.head())


   user_id       age gender     location income_level  \
0        1 -0.361812      F  Los Angeles          Low   
1        2 -1.148361      F  Los Angeles       Medium   
2        3 -1.305671      M     New York         High   
3        4  0.267427      F      Chicago         High   
4        5  0.110117      F  Los Angeles          Low   

  recently_watched_categories  watch_duration watch_frequency  gender_encoded  
0             Horror|Thriller       -0.381369         Monthly             1.0  
1               Drama|Romance        1.775183           Daily             1.0  
2               Comedy|Family        0.367749          Weekly             0.0  
3               Sports|Action       -0.177064           Daily             1.0  
4               Drama|Romance       -0.335968         Monthly             1.0  


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score


X = [[15, 30, 1], [25, 40, 0], [30, 50, 1], [22, 35, 0]] 
y = [1, 0, 1, 0]  


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = RandomForestClassifier()
model.fit(X_train, y_train)


y_pred = model.predict(X_test)


print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))


Accuracy: 1.0
Precision: 0.0
Recall: 0.0


In [5]:
from sklearn.model_selection import train_test_split
import numpy as np


X = np.random.rand(10, 3)  
y = np.random.randint(0, 2, 10)  


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")


X_train shape: (8, 3)
X_test shape: (2, 3)
y_train shape: (8,)
y_test shape: (2,)


In [6]:
def predict_ad(user_features):
    
    features_to_scale = user_features[:2]  
    other_features = user_features[2:]     

    
    scaled_features = scaler.transform([features_to_scale])

    
    processed_data = list(scaled_features[0]) + other_features

   
    prediction = model.predict([processed_data])
    return prediction


In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier


X_train = [[15, 30, 1], [25, 40, 0], [30, 50, 1], [22, 35, 0]]  
y_train = [1, 0, 1, 0]  


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)


model = RandomForestClassifier()
model.fit(X_train_scaled, y_train)


def predict_ad(user_input):
    
    scaled_input = scaler.transform([user_input])  
    predicted_ad = model.predict(scaled_input)  
    return predicted_ad[0]




In [8]:

user_input = [15, 30, 1]  
ad_prediction = predict_ad(user_input)
print("Predicted Ad:", ad_prediction)


Predicted Ad: 1


In [9]:
import pandas as pd
import random


user_data = {
    "user_id": range(1, 11),
    "age": [random.randint(18, 50) for _ in range(10)],
    "gender": [random.choice(["M", "F"]) for _ in range(10)],
    "location": [random.choice(["New York", "Los Angeles", "Chicago", "Houston"]) for _ in range(10)],
    "income_level": [random.choice(["Low", "Medium", "High"]) for _ in range(10)]
}
users = pd.DataFrame(user_data)


watch_data = {
    "user_id": range(1, 11),
    "recently_watched_categories": [random.choice(["Sports|Action", "Drama|Romance", "Comedy|Family", "Horror|Thriller"]) for _ in range(10)],
    "watch_duration": [random.randint(30, 180) for _ in range(10)],  # Duration in minutes
    "watch_frequency": [random.choice(["Daily", "Weekly", "Monthly"]) for _ in range(10)]
}
watch_history = pd.DataFrame(watch_data)


ad_data = {
    "ad_id": range(101, 111),
    "category": [random.choice(["Sports", "Drama", "Comedy", "Horror", "Action"]) for _ in range(10)],
    "target_age_group": [random.choice(["18-25", "26-35", "36-50"]) for _ in range(10)],
    "target_gender": [random.choice(["M", "F"]) for _ in range(10)],
    "target_income_level": [random.choice(["Low", "Medium", "High"]) for _ in range(10)]
}
ads = pd.DataFrame(ad_data)


users.to_csv("user_demographics.csv", index=False)
watch_history.to_csv("watch_history.csv", index=False)
ads.to_csv("ads.csv", index=False)

print("Data generated and saved to CSV files!")


Data generated and saved to CSV files!


In [10]:

users['gender_encoded'] = users['gender'].map({'Male': 0, 'Female': 1})


In [11]:

import pandas as pd

training_features = pd.DataFrame({
    'age': users['age'],
    'gender_encoded': users['gender_encoded'],  # Assuming this column exists
    'watch_duration': watch_history['watch_duration'],  # Assuming this column exists
})

training_labels = ads['ad_id']


In [12]:

from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(training_features, training_labels)


In [13]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_features = scaler.fit_transform(training_features)


In [14]:

if 'gender_encoded' not in users.columns:
    users['gender_encoded'] = users['gender'].map({'Male': 0, 'Female': 1})


training_features = pd.DataFrame({
    'age': users['age'],
    'gender_encoded': users['gender_encoded'],
    'watch_duration': watch_history['watch_duration'],
})


training_labels = ads['ad_id']
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(training_features)

from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(scaled_features, training_labels)

test_user = [30, 1, 90]  # Example input: age=30, gender_encoded=1 (Female), watch_duration=90
scaled_test_user = scaler.transform([test_user])
predicted_ad = model.predict(scaled_test_user)
print("Predicted Ad ID:", predicted_ad)


Predicted Ad ID: [103]


In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import joblib


X_train = [[25, 1, 120], [30, 0, 150], [22, 1, 100]]
y_train = [1, 2, 3]  
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

model = RandomForestClassifier()
model.fit(X_train_scaled, y_train)

joblib.dump(model, 'model.pkl')
joblib.dump(scaler, 'scaler.pkl')


['scaler.pkl']

In [16]:

test_user = [25, 0, 120]

scaled_test_user = scaler.transform([test_user])
predicted_ad = model.predict(scaled_test_user)


print("Predicted Ad ID:", predicted_ad)


Predicted Ad ID: [1]


In [2]:
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import numpy as np

# Example training data
X = np.array([
    [20, 1, 50],
    [25, 0, 100],
    [30, 1, 200]
])  # Features: [age, gender, watchDuration]
y = np.array([0, 1, 1])  # Labels: [Ad type]

# Train the scaler and model
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
model = RandomForestClassifier()
model.fit(X_scaled, y)

# Save the scaler and model
joblib.dump(scaler, 'data/scaler.pkl')
joblib.dump(model, 'data/model.pkl')
print("Model and scaler saved successfully!")


Model and scaler saved successfully!


In [18]:
test_user_1 = [40, 1, 30]
scaled_test_user_1 = scaler.transform([test_user_1])
predicted_ad_1 = model.predict(scaled_test_user_1)
print("Predicted Ad ID for test_user_1:", predicted_ad_1)

test_user_2 = [18, 0, 200]
scaled_test_user_2 = scaler.transform([test_user_2])
predicted_ad_2 = model.predict(scaled_test_user_2)
print("Predicted Ad ID for test_user_2:", predicted_ad_2)


Predicted Ad ID for test_user_1: [1]
Predicted Ad ID for test_user_2: [2]
