In [1]:
import pandas as pd
import numpy as np
from io import StringIO

In [2]:
# The data as a string
data = """CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
1,Male,19,15,39
2,Male,21,15,81
3,Female,20,16,6
4,Female,23,16,77
5,Female,31,17,40"""

In [3]:
# Use StringIO to read the string as if it were a file
df = pd.read_csv(StringIO(data))

# Display the dataframe
print(df)

   CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)
0           1    Male   19                  15                      39
1           2    Male   21                  15                      81
2           3  Female   20                  16                       6
3           4  Female   23                  16                      77
4           5  Female   31                  17                      40


In [4]:
df.shape

(5, 5)

In [5]:
df.isna().sum()

CustomerID                0
Gender                    0
Age                       0
Annual Income (k$)        0
Spending Score (1-100)    0
dtype: int64

In [6]:
# Define the number of rows
num_rows = 200

# Generate random data
np.random.seed(0)  # For reproducibility

customer_ids = np.arange(1, num_rows + 1)
genders = np.random.choice(['Male', 'Female'], num_rows)
ages = np.random.randint(18, 70, num_rows)
annual_incomes = np.random.randint(15, 150, num_rows)
spending_scores = np.random.randint(1, 101, num_rows)

# Create the DataFrame
data = {
    'CustomerID': customer_ids,
    'Gender': genders,
    'Age': ages,
    'Annual Income (k$)': annual_incomes,
    'Spending Score (1-100)': spending_scores
}

df = pd.DataFrame(data)

# Display the first few rows of the DataFrame
print(df.head())

   CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)
0           1    Male   20                 102                      70
1           2  Female   21                  28                      74
2           3  Female   48                  73                      55
3           4    Male   52                  96                      78
4           5  Female   61                 135                      22


In [7]:
# Rename the column 'AnIncome' to 'Annual Income (k$)'
df.columns = df.columns.str.replace('Annual Income (k$)', 'AnIncome')

df.rename(columns = {'Spending Score (1-100)':'SpendinScore'}, inplace=True)
print(df.head())

   CustomerID  Gender  Age  AnIncome  SpendinScore
0           1    Male   20       102            70
1           2  Female   21        28            74
2           3  Female   48        73            55
3           4    Male   52        96            78
4           5  Female   61       135            22


In [8]:
df.Gender=df.Gender.map({'Male':1, 'Female':0})
print(df.head())

   CustomerID  Gender  Age  AnIncome  SpendinScore
0           1       1   20       102            70
1           2       0   21        28            74
2           3       0   48        73            55
3           4       1   52        96            78
4           5       0   61       135            22


In [9]:
x=df.drop(columns='SpendinScore')
y=df['SpendinScore']

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
xt, xs, yt, ys=train_test_split(x, y, test_size=0.2, random_state=9)
xt.shape

(160, 4)

In [12]:
xt

Unnamed: 0,CustomerID,Gender,Age,AnIncome
146,147,0,20,108
169,170,1,47,68
145,146,1,38,147
55,56,1,62,129
21,22,0,29,41
...,...,...,...,...
56,57,0,42,39
182,183,0,18,140
199,200,1,25,138
92,93,1,27,98


In [20]:
yt

146    71
169    51
145    58
55     25
21     75
       ..
56     34
182    77
199    64
92      4
126    73
Name: SpendinScore, Length: 160, dtype: int64

In [15]:
xt

Unnamed: 0,CustomerID,Gender,Age,AnIncome
146,147,0,20,108
169,170,1,47,68
145,146,1,38,147
55,56,1,62,129
21,22,0,29,41
...,...,...,...,...
56,57,0,42,39
182,183,0,18,140
199,200,1,25,138
92,93,1,27,98


In [16]:
from sklearn import svm

svm_model = svm.SVR()

svm_model.fit(xt, yt)


In [17]:
svm_prediction = svm_model.predict(xs)

In [35]:
from sklearn.metrics import mean_absolute_error, accuracy_score, classification_report,confusion_matrix

svm_mae = mean_absolute_error(ys, svm_prediction)
svm_mae

23.840564969272027

In [25]:
from sklearn.linear_model import LinearRegression

In [27]:
machine=LinearRegression()

In [28]:
machine.fit(xt,yt)

In [29]:
machine.score(xs,ys)

-0.0626752410025464

In [31]:
from sklearn.tree import DecisionTreeClassifier

In [32]:
m2=DecisionTreeClassifier()

In [33]:
m2.fit(xt,yt)

In [36]:
m2_pred=m2.predict(xs)

In [37]:
confusion_matrix(ys,m2_pred)

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [38]:
accuracy_score(ys,m2_pred)

0.0

In [39]:
from sklearn.tree import DecisionTreeRegressor

dt_model = DecisionTreeRegressor()

dt_model.fit(xt, yt)

dt_prediction = dt_model.predict(xs)

dt_mae = mean_absolute_error(ys, dt_prediction)

dt_mae

29.85

In [40]:
accuracy_score(ys,dt_prediction)

0.05

In [43]:
confusion_matrix(ys,dt_prediction)[1:3]

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]])