# Importing Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier


# Loading data

In [2]:
data = pd.read_csv("train_data.csv")

# Split the data into a training set and a test set

In [3]:
X_train, X_test, y_train, y_test = train_test_split(data[[
    "age", "gender", "region_category", "membership_category", "joined_through_referral", "preferred_offer_types",
    "medium_of_operation", "internet_option",  "days_since_last_login", "avg_time_spent", "avg_transaction_value",
    "avg_frequency_login_days", "points_in_wallet", "used_special_discount", "offer_application_preference", "past_complaint",
    "complaint_status", "feedback"
]], data["churn_risk_score"], test_size=0.25)


# Train the model

In [4]:
# Create a StandardScaler object
scaler = StandardScaler()

# Fit the scaler to the training data
scaler.fit(X_train)

# Scale the training data
X_train_scaled = scaler.transform(X_train)

# Scale the test data
X_test_scaled = scaler.transform(X_test)

In [5]:
model = LogisticRegression(max_iter=30000)
model.fit(X_train_scaled, y_train)

LogisticRegression(max_iter=30000)

In [6]:
model2 = SVC(kernel='rbf')
model2.fit(X_train_scaled,y_train)

SVC()

In [7]:
model3 = DecisionTreeClassifier()
model3.fit(X_train_scaled,y_train)

DecisionTreeClassifier()

In [8]:
model4 = RandomForestClassifier()
model4.fit(X_train_scaled,y_train)

RandomForestClassifier()

# Predict churn risk scores for the test set

In [15]:
predictions = model.predict(X_test_scaled)
predictions2 = model2.predict(X_test_scaled)
predictions3 = model3.predict(X_test_scaled)
predictions4 = model4.predict(X_test_scaled)


# Calculate the accuracy of the model

In [None]:
accuracy = model.score(X_test, y_test)
accuracy2 = model2.score(X_test, y_test)
accuracy3 = model3.score(X_test, y_test)
accuracy4 = model4.score(X_test, y_test)

# Accuracy

In [17]:
print("The accuracy of the model is:", accuracy)
print("The accuracy of the model2 is:", accuracy2)
print("The accuracy of the model3 is:", accuracy3)
print("The accuracy of the model3 is:", accuracy4)


The accuracy of the model is: 0.07717403532455844
The accuracy of the model2 is: 0.07728653391832602
The accuracy of the model3 is: 0.07852401844976938
The accuracy of the model3 is: 0.08234897063786703


In [19]:
# print(predictions)
# print(predictions2)
# print(predictions3)
# print(predictions4)
# print(y_test)

[5 4 4 ... 4 3 1]
[3 3 3 ... 4 3 1]
[3 3 4 ... 4 4 2]
[3 3 5 ... 4 4 2]
278      3
4387     3
10812    5
3170     3
2546     4
        ..
34444    2
27059    3
2926     4
32120    4
17877    2
Name: churn_risk_score, Length: 8889, dtype: int64


# Prediction on Future Data

In [56]:
prediction_data = pd.read_csv("test.csv")

In [57]:
# Create a StandardScaler object
scaler = StandardScaler()

# Fit the scaler to the training data
scaler.fit(prediction_data)

# Scale the training data
X_data = scaler.transform(prediction_data)

In [58]:
Prediction_X_data = model4.predict(X_data)

In [59]:
print(Prediction_X_data)

[3 3 3 ... 5 4 3]


In [63]:
new_data = pd.DataFrame({
    "predicted_churn_risk": Prediction_X_data,
})

# Append the new data to the existing CSV file
new_data.to_csv("test.csv", mode='a', header='predicted_churn_risk', index=False)