In [1]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

# Load your dataset
input_file = "C:/Users/ron11/OneDrive/Desktop/Katalyst.csv"  # Replace with your file path
model_file = 'model.pkl'      # File path for saving the model

# Load the data into a DataFrame
data = pd.read_csv(input_file)

# Display the first few rows of the dataset to understand its structure
print(data.head())

# Initialize LabelEncoders for categorical features and target variable
label_encoders = {}
categorical_features = features = ['Chat tickets', 'E-mail tickets', 'Call tickets', 'CT Resolved', 'ET Resolved', 'Call Resolved']

# Check and encode categorical features
for column in categorical_features:
    if data[column].dtype == 'object':
        le = LabelEncoder()
        data[column] = le.fit_transform(data[column])
        label_encoders[column] = le

# Check and encode target variable
target = 'Prediction'
if data[target].dtype == 'object':
    le = LabelEncoder()
    data[target] = le.fit_transform(data[target])
    label_encoders[target] = le

# Define features and target
X = data[categorical_features]
y = data[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Save the trained model to a .pkl file
with open(model_file, 'wb') as file:
    pickle.dump(model, file)

# Load the model back (optional, for verification)
with open(model_file, 'rb') as file:
    loaded_model = pickle.load(file)

# Make predictions on the entire dataset using the loaded model
data['Prediction'] = loaded_model.predict(X)

# Decode the target variable back to the original labels if it was categorical
if target in label_encoders:
    data['Prediction'] = label_encoders[target].inverse_transform(data['Prediction'])

# Save the updated DataFrame to the same CSV file
data.to_csv(input_file, index=False)

print(f'Model has been saved to {model_file}')
print(f'Predictions have been saved to {input_file}')


  Name of Cx                  E-mail        Phone      Plan name  \
0    Atharva       atharva@gmail.com   5959522154         Silver   
1    Shubham       shubham@gmail.com   1956541526         Bronze   
2      Karan         karan@gmail.com   2046439102           Gold   
3    Rishabh  rishabh@rediffmail.com   6049419730  Session Based   
4         Om          om@hotmail.com  10052400358         Silver   

                                 VM Name  No of total users  Chat tickets  \
0  atharva.vdi.hostingcloudapp.com:32411                  2             2   
1  shubham.vdi.hostingcloudapp.com:32412                  1             1   
2    karan.vdi.hostingcloudapp.com:32413                  4             3   
3  rishabh.vdi.hostingcloudapp.com:32414                  1             4   
4       om.vdi.hostingcloudapp.com:32415                  3             2   

   E-mail tickets  Call tickets  CT Resolved  ET Resolved  Call Resolved  \
0               4             2            1        