In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.impute import SimpleImputer


In [2]:
# Load the dataset

url = 'https://docs.google.com/spreadsheets/d/1rK1CLqpsd6JfSBLk9nRE-f0NzDc9lEXgxZ-cKjxIN_s/export?format=csv'

df = pd.read_csv(url, na_values=['9b2d5b4678781e53038e91ea5324530a03f27dc1d0e5f6c9bc9d493a23be9de0'])


In [3]:
# Data cleaning and preprocessing

df.dropna(subset=['status'], inplace=True)  # Drop rows with missing status

df.drop(df[df['status'].isin(['OPEN', 'LOST'])].index, inplace=True)  # Drop rows with non-WON/LOST status

X = df.drop(['status', 'lead_id'], axis=1)  # Input features

y = df['status']  # Output target

encoder = OrdinalEncoder()

X = encoder.fit_transform(X)  # Encode categorical features

imputer = SimpleImputer(strategy='mean')  # create an imputer object

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

X_train_imputed = imputer.fit_transform(X_train)  # impute missing values in the training data

X_test_imputed = imputer.transform(X_test)  # impute missing values in the testing data

ordinal_encoder = OrdinalEncoder()

y_train = ordinal_encoder.fit_transform(y_train.values.reshape(-1, 1)).ravel()  # Encode the output target as numerical values

y_test = ordinal_encoder.transform(y_test.values.reshape(-1, 1)).ravel()  # Encode the output target as numerical values


In [4]:
# Train a Random Forest Classifier model

try:

    model = make_pipeline(RandomForestClassifier(n_estimators=100, random_state=42))

    model.fit(X_train_imputed, y_train)

except Exception as e:

    print(f"An error occurred while fitting the model: {str(e)}")


In [5]:
# Evaluate the performance of the model

y_pred = model.predict(X_test_imputed)

print('Accuracy:', accuracy_score(y_test, y_pred))

print('Precision:', precision_score(y_test, y_pred, average='weighted'))

print('Recall:', recall_score(y_test, y_pred, average='weighted'))

print('F1 score:', f1_score(y_test, y_pred, average='weighted'))

Accuracy: 0.9336633663366337
Precision: 0.9145224502288938
Recall: 0.9336633663366337
F1 score: 0.9116237151008717


  _warn_prf(average, modifier, msg_start, len(result))
