In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [19]:
# Step 1: Data Collection
iphone_data = pd.read_csv('C:/Users/DELL/OneDrive/Desktop/iphone_purchase_records.csv')

In [20]:
iphone_data.head()

Unnamed: 0,Gender,Age,Salary,Purchase Iphone
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


In [22]:
# Step 2: Data Preprocessing
# Handling missing data and encoding categorical variables
iphone_data = iphone_data.dropna()
iphone_data

Unnamed: 0,Gender,Age,Salary,Purchase Iphone
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0
...,...,...,...,...
395,Female,46,41000,1
396,Male,51,23000,1
397,Female,50,20000,1
398,Male,36,33000,0


In [26]:
iphone_data['Gender'] = pd.get_dummies(iphone_data['Gender'], drop_first=True)
iphone_data['Gender'] 

0       True
1       True
2      False
3      False
4       True
       ...  
395    False
396     True
397    False
398     True
399    False
Name: Gender, Length: 400, dtype: bool

In [28]:
# Scaling numerical features
scaler = StandardScaler()
numerical_features = ['Age', 'Salary']
iphone_data[numerical_features] = scaler.fit_transform(iphone_data[numerical_features])


In [35]:
# Step 3: Exploratory Data Analysis (EDA)
# Visualizing data distributions and relationships
sns.pairplot(iphone_data, hue='Purchase Iphone', diag_kind='kde')
plt.savefig('eda_pairplot.png')
plt.close()


In [36]:
# Step 4: Feature Engineering
# No specific feature engineering in this example

In [40]:
# Step 5: Model Building (using KNN)
X = iphone_data[['Gender', 'Age', 'Salary']]
y = iphone_data['Purchase Iphone']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [43]:
knn_model = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors (k) as needed
knn_model.fit(X_train, y_train)


In [45]:
# Step 6: Model Evaluation
y_pred = knn_model.predict(X_test)
y_pred

array([1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0], dtype=int64)

In [46]:
print('Accuracy:', round(accuracy_score(y_test, y_pred), 2))
print('\nClassification Report:\n', classification_report(y_test, y_pred))
print('\nConfusion Matrix:\n', confusion_matrix(y_test, y_pred))

Accuracy: 0.93

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.92      0.94        52
           1       0.87      0.93      0.90        28

    accuracy                           0.93        80
   macro avg       0.91      0.93      0.92        80
weighted avg       0.93      0.93      0.93        80


Confusion Matrix:
 [[48  4]
 [ 2 26]]
