In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

train = pd.read_csv("/content/drive/MyDrive/dataset (1)/train.csv")
test = pd.read_csv("/content/drive/MyDrive/dataset (1)/test.csv")

drop_cols = ["company_id", "company_name", "last_funding_date"]
train = train.drop(columns=drop_cols)
test_ids = test["company_id"]
test = test.drop(columns=drop_cols)

label_encoders = {}
for col in ["industry", "hiring_roles"]:
    le = LabelEncoder()
    train[col] = le.fit_transform(train[col])
    test[col] = le.transform(test[col])
    label_encoders[col] = le

X = train.drop(columns=["is_hot_lead"])
y = train["is_hot_lead"]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

for df in [X_train, X_val, test]:
    for col in df.columns:
        max_finite_value = df[df[col] != np.inf][col].max()
        df[col] = df[col].replace(np.inf, max_finite_value)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)
conf_matrix = confusion_matrix(y_val, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)

if "is_hot_lead" in test.columns:
    test = test.drop(columns=["is_hot_lead"])

test_predictions = model.predict(test)

submission = pd.DataFrame({"company_id": test_ids, "is_hot_lead": test_predictions})
submission.to_csv("submission.csv", index=False)

print("Submission file saved as submission.csv in runtime memory")


Accuracy: 0.9543
Precision: 0.8548
Recall: 0.8479
F1-Score: 0.8513
Confusion Matrix:
[[3293   89]
 [  94  524]]
Submission file saved as submission.csv in runtime memory


In [4]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

train = pd.read_csv("/content/drive/MyDrive/dataset (1)/train.csv")
test = pd.read_csv("/content/drive/MyDrive/dataset (1)/test.csv")

drop_cols = ["company_id", "company_name", "last_funding_date"]
train = train.drop(columns=drop_cols)
test_ids = test["company_id"]
test = test.drop(columns=drop_cols)

label_encoders = {}
for col in ["industry", "hiring_roles"]:
    le = LabelEncoder()
    train[col] = le.fit_transform(train[col])
    test[col] = le.transform(test[col])
    label_encoders[col] = le

X = train.drop(columns=["is_hot_lead"])
y = train["is_hot_lead"]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

for df in [X_train, X_val, test]:
    for col in df.columns:
        max_finite_value = df[df[col] != np.inf][col].max()
        df[col] = df[col].replace(np.inf, max_finite_value)

model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=6, random_state=42, use_label_encoder=False, eval_metric="logloss")
model.fit(X_train, y_train)

y_pred = model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)
conf_matrix = confusion_matrix(y_val, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)

if "is_hot_lead" in test.columns:
    test = test.drop(columns=["is_hot_lead"])

test_predictions = model.predict(test)

submission = pd.DataFrame({"company_id": test_ids, "is_hot_lead": test_predictions})
submission.to_csv("submission.csv", index=False)

print("Submission file saved as submission.csv in runtime memory")


Parameters: { "use_label_encoder" } are not used.



Accuracy: 0.9657
Precision: 0.8683
Recall: 0.9175
F1-Score: 0.8922
Confusion Matrix:
[[3296   86]
 [  51  567]]
Submission file saved as submission.csv in runtime memory
