# Customer Churn Prediction for a Subscription Service
#### BY GOKUL VENU

### Impoting libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer




###  Load the dataset

In [2]:
url = 'https://drive.google.com/uc?id=1-RyWmxPFTyfctnXN1OWX4_9aGHgDfFNn'
df = pd.read_csv(url)

### Drop irrelevant columns

In [3]:
df.drop(['Customer ID', 'City', 'Zip Code', 'Latitude', 'Longitude'], axis=1, inplace=True)

### Encode the target variable

In [4]:
label_encoder = LabelEncoder()
df['Churn Category'] = label_encoder.fit_transform(df['Churn Category'])

### Define features and target variable

X = df.drop('Churn Category', axis=1)
y = df['Churn Category']

### Define categorical and numerical features

In [6]:
categorical_features = X.select_dtypes(include=['object']).columns.tolist()
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

 ### Impute missing values

In [7]:
imputer = SimpleImputer(strategy='mean')
X[numerical_features] = imputer.fit_transform(X[numerical_features])

In [8]:
imputer = SimpleImputer(strategy='most_frequent')
X[categorical_features] = imputer.fit_transform(X[categorical_features])

###  Create column transformer

In [9]:
column_transformer = ColumnTransformer([
    ('encoder', OneHotEncoder(), categorical_features),
    ('scaler', StandardScaler(), numerical_features)
])

### Fit and transform the column transformer on the features

In [10]:
X_encoded = column_transformer.fit_transform(X)

###  Split the dataset into training and testing sets (80% train, 20% test)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

### Initialize and train the RandomForestClassifier

In [12]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)

### Make predictions on the test set

In [13]:
y_pred = rf_classifier.predict(X_test)

### Calculate accuracy

In [14]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.9992902767920511
