In [2]:
# Step 1: Upload the CSV File
from google.colab import files
uploaded = files.upload()

# Step 2: Import pandas library
import pandas as pd

# Step 3: Load the uploaded CSV file into a pandas DataFrame
# Replace 'your_file.csv' with the name of the uploaded file
file_name = list(uploaded.keys())[0]
df = pd.read_csv(file_name)

# Step 4: Drop specific columns
# Replace ['Column1', 'Column2'] with the actual column names you want to drop
df.drop(['Age', 'Gender', 'Item Purchased', 'Location', 'Size', 'Color', 'Shipping Type', 'Discount Applied', 'Promo Code Used', 'Previous Purchases', 'Payment Method', 'Frequency of Purchases'], axis=1, inplace=True)

# Step 5: Save the modified DataFrame to a CSV file
# Replace 'modified_file.csv' with your desired file name
modified_file_name = 'pps1.csv'
df.to_csv(modified_file_name, index=False)

# Step 6: Download the modified CSV file to your local machine
files.download(modified_file_name)


Saving PPS.csv to PPS.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Load the dataset
file_path = 'pps1.csv'
data = pd.read_csv(file_path)

# Step 1: Data Preprocessing

# Encoding categorical variables
label_encoder_category = LabelEncoder()
label_encoder_season = LabelEncoder()
label_encoder_subscription = LabelEncoder()

data['Category'] = label_encoder_category.fit_transform(data['Category'])
data['Season'] = label_encoder_season.fit_transform(data['Season'])
data['Subscription Status'] = label_encoder_subscription.fit_transform(data['Subscription Status'])

# Define features and target variable
X = data.drop(columns=['Customer ID', 'Subscription Status'])
y = data['Subscription Status']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 2: Model Selection
# Initialize k-NN classifier with k=5 (you can adjust this value)
knn_model = KNeighborsClassifier(n_neighbors=5)

# Step 3: Model Training
knn_model.fit(X_train, y_train)

# Step 4: Model Evaluation
y_pred = knn_model.predict(X_test)

# Calculate evaluation metrics with zero_division=1 to avoid warnings
print("Classification Report:\n", classification_report(y_test, y_pred, zero_division=1))
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print(f"Precision: {precision_score(y_test, y_pred, zero_division=1):.2f}")
print(f"Recall: {recall_score(y_test, y_pred, zero_division=1):.2f}")
print(f"F1 Score: {f1_score(y_test, y_pred, zero_division=1):.2f}")

# Step 5: User Input for New Prediction
def predict_subscription():
    # Input from user
    category = input("Enter product category (e.g., Clothing, Footwear): ").title()  # Title case for consistency
    purchase_amount = float(input("Enter purchase amount (USD): "))
    season = input("Enter season (e.g., Winter, Spring): ").title()  # Title case for consistency
    review_rating = float(input("Enter review rating (e.g., 3.5): "))

    # Check if category and season are in the original training data
    if category not in label_encoder_category.classes_ or season not in label_encoder_season.classes_:
        print(f"Error: Category '{category}' or Season '{season}' not found in training data.")
        return

    # Encode and scale the input
    category_encoded = label_encoder_category.transform([category])[0]
    season_encoded = label_encoder_season.transform([season])[0]

    input_data = [[category_encoded, purchase_amount, season_encoded, review_rating]]
    input_data_scaled = scaler.transform(input_data)

    # Predict subscription status
    prediction = knn_model.predict(input_data_scaled)

    if prediction[0] == 1:
        print("Prediction: The customer will subscribe.")
    else:
        print("Prediction: The customer will not subscribe.")

# Call the function to predict based on user input
predict_subscription()


Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.87      0.79       834
           1       0.29      0.13      0.18       336

    accuracy                           0.66      1170
   macro avg       0.50      0.50      0.48      1170
weighted avg       0.59      0.66      0.61      1170

Accuracy: 0.66
Precision: 0.29
Recall: 0.13
F1 Score: 0.18
Enter product category (e.g., Clothing, Footwear): Clothing
Enter purchase amount (USD): 30
Enter season (e.g., Winter, Spring): Winter
Enter review rating (e.g., 3.5): 2.5
Prediction: The customer will not subscribe.


