In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [2]:
# Load the dataset
df = pd.read_csv("leadscore.csv")

In [3]:
df = df[df['status'].isin(['WON', 'LOST'])]

In [4]:
#create lead Score
df['LEAD_SCORE'] = np.random.randint(0, 100, size=(len(df),))

In [5]:
df.to_csv('lead_score_data.csv', index=False)

In [6]:
print(df.columns)


Index(['Unnamed: 0', 'Agent_id', 'status', 'lost_reason', 'budget', 'lease',
       'movein', 'source', 'source_city', 'source_country', 'utm_source',
       'utm_medium', 'des_city', 'des_country', 'room_type', 'lead_id',
       'LEAD_SCORE'],
      dtype='object')


In [7]:
#win or loss
df = df[(df.status == 'WON') | (df.status == 'LOST')]

In [8]:
#replace NAN
df = df.replace('9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0', np.nan)


In [9]:
le = LabelEncoder()
df = df.apply(le.fit_transform)

In [10]:
X = df.drop(["LEAD_SCORE", "status"], axis=1)
y = df["LEAD_SCORE"]

In [11]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [12]:
clf = RandomForestClassifier(n_estimators=100, random_state=0)
clf.fit(X_train, y_train)


RandomForestClassifier(random_state=0)

In [13]:
y_pred = clf.predict(X_test)

In [14]:
# Evaluate the performance
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted')
rec = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

In [15]:
print("Accuracy: ", acc)
print("Precision: ", prec)
print("Recall: ", rec)
print("F1-score: ", f1)

Accuracy:  0.008959412780656303
Precision:  0.009025677998007143
Recall:  0.008959412780656303
F1-score:  0.008837707989697132
