In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('Mall_customers.csv')

In [3]:
print(data.head())

   CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)
0           1    Male   19                  15                      39
1           2    Male   21                  15                      81
2           3  Female   20                  16                       6
3           4  Female   23                  16                      77
4           5  Female   31                  17                      40


In [4]:
print(data.tail())

     CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)
195         196  Female   35                 120                      79
196         197  Female   45                 126                      28
197         198    Male   32                 126                      74
198         199    Male   32                 137                      18
199         200    Male   30                 137                      83


# checking data type

In [5]:
print(type(data))
print(data.info())

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
CustomerID                200 non-null int64
Gender                    200 non-null object
Age                       200 non-null int64
Annual Income (k$)        200 non-null int64
Spending Score (1-100)    200 non-null int64
dtypes: int64(4), object(1)
memory usage: 7.9+ KB
None


In [6]:
print(data.describe(include="all"))

        CustomerID  Gender         Age  Annual Income (k$)  \
count   200.000000     200  200.000000          200.000000   
unique         NaN       2         NaN                 NaN   
top            NaN  Female         NaN                 NaN   
freq           NaN     112         NaN                 NaN   
mean    100.500000     NaN   38.850000           60.560000   
std      57.879185     NaN   13.969007           26.264721   
min       1.000000     NaN   18.000000           15.000000   
25%      50.750000     NaN   28.750000           41.500000   
50%     100.500000     NaN   36.000000           61.500000   
75%     150.250000     NaN   49.000000           78.000000   
max     200.000000     NaN   70.000000          137.000000   

        Spending Score (1-100)  
count               200.000000  
unique                     NaN  
top                        NaN  
freq                       NaN  
mean                 50.200000  
std                  25.823522  
min                   1.

# reading featues

In [8]:
print(data['Annual Income (k$)'])

0       15
1       15
2       16
3       16
4       17
      ... 
195    120
196    126
197    126
198    137
199    137
Name: Annual Income (k$), Length: 200, dtype: int64


In [9]:
print(data['Spending Score (1-100)'])

0      39
1      81
2       6
3      77
4      40
       ..
195    79
196    28
197    74
198    18
199    83
Name: Spending Score (1-100), Length: 200, dtype: int64


In [10]:
sl_column = data['CustomerID']
print(data[sl_column > 50 ])

     CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)
50           51  Female   49                  42                      52
51           52    Male   33                  42                      60
52           53  Female   31                  43                      54
53           54    Male   59                  43                      60
54           55  Female   50                  43                      45
..          ...     ...  ...                 ...                     ...
195         196  Female   35                 120                      79
196         197  Female   45                 126                      28
197         198    Male   32                 126                      74
198         199    Male   32                 137                      18
199         200    Male   30                 137                      83

[150 rows x 5 columns]


# label encoding

In [13]:
Gender_col = data['Gender']
print(Gender_col)

0        Male
1        Male
2      Female
3      Female
4      Female
        ...  
195    Female
196    Female
197      Male
198      Male
199      Male
Name: Gender, Length: 200, dtype: object


In [14]:
print(set(Gender_col))

{'Female', 'Male'}


In [15]:
encoding_map = { 'Gender' : {'Male' : 0, 'Female': 1}}
data.replace(encoding_map, inplace=True)

In [17]:
Gender_col = data['Gender']

In [18]:
print(Gender_col.value_counts())

1    112
0     88
Name: Gender, dtype: int64


In [19]:
print(data[Gender_col == 1])

     CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)
2             3       1   20                  16                       6
3             4       1   23                  16                      77
4             5       1   31                  17                      40
5             6       1   22                  17                      76
6             7       1   35                  18                       6
..          ...     ...  ...                 ...                     ...
191         192       1   32                 103                      69
193         194       1   38                 113                      91
194         195       1   47                 120                      16
195         196       1   35                 120                      79
196         197       1   45                 126                      28

[112 rows x 5 columns]


# preparing dataset for model



In [20]:

features = ['Age', 'Annual Income (k$)', 'Spending Score (1-100)']
labels = ['Gender']

x = data[features]
y = data[labels]

In [21]:
print(x)

     Age  Annual Income (k$)  Spending Score (1-100)
0     19                  15                      39
1     21                  15                      81
2     20                  16                       6
3     23                  16                      77
4     31                  17                      40
..   ...                 ...                     ...
195   35                 120                      79
196   45                 126                      28
197   32                 126                      74
198   32                 137                      18
199   30                 137                      83

[200 rows x 3 columns]


In [22]:
print(y)

     Gender
0         0
1         0
2         1
3         1
4         1
..      ...
195       1
196       1
197       0
198       0
199       0

[200 rows x 1 columns]


In [23]:
x = x.values
y = y.values

In [24]:
print(type(x))
print('-'*30)
print(x)

<class 'numpy.ndarray'>
------------------------------
[[ 19  15  39]
 [ 21  15  81]
 [ 20  16   6]
 [ 23  16  77]
 [ 31  17  40]
 [ 22  17  76]
 [ 35  18   6]
 [ 23  18  94]
 [ 64  19   3]
 [ 30  19  72]
 [ 67  19  14]
 [ 35  19  99]
 [ 58  20  15]
 [ 24  20  77]
 [ 37  20  13]
 [ 22  20  79]
 [ 35  21  35]
 [ 20  21  66]
 [ 52  23  29]
 [ 35  23  98]
 [ 35  24  35]
 [ 25  24  73]
 [ 46  25   5]
 [ 31  25  73]
 [ 54  28  14]
 [ 29  28  82]
 [ 45  28  32]
 [ 35  28  61]
 [ 40  29  31]
 [ 23  29  87]
 [ 60  30   4]
 [ 21  30  73]
 [ 53  33   4]
 [ 18  33  92]
 [ 49  33  14]
 [ 21  33  81]
 [ 42  34  17]
 [ 30  34  73]
 [ 36  37  26]
 [ 20  37  75]
 [ 65  38  35]
 [ 24  38  92]
 [ 48  39  36]
 [ 31  39  61]
 [ 49  39  28]
 [ 24  39  65]
 [ 50  40  55]
 [ 27  40  47]
 [ 29  40  42]
 [ 31  40  42]
 [ 49  42  52]
 [ 33  42  60]
 [ 31  43  54]
 [ 59  43  60]
 [ 50  43  45]
 [ 47  43  41]
 [ 51  44  50]
 [ 69  44  46]
 [ 27  46  51]
 [ 53  46  46]
 [ 70  46  56]
 [ 19  46  55]
 [ 67  47  52]


# Example #multiply all values of matix

In [25]:


x * 2

array([[ 38,  30,  78],
       [ 42,  30, 162],
       [ 40,  32,  12],
       [ 46,  32, 154],
       [ 62,  34,  80],
       [ 44,  34, 152],
       [ 70,  36,  12],
       [ 46,  36, 188],
       [128,  38,   6],
       [ 60,  38, 144],
       [134,  38,  28],
       [ 70,  38, 198],
       [116,  40,  30],
       [ 48,  40, 154],
       [ 74,  40,  26],
       [ 44,  40, 158],
       [ 70,  42,  70],
       [ 40,  42, 132],
       [104,  46,  58],
       [ 70,  46, 196],
       [ 70,  48,  70],
       [ 50,  48, 146],
       [ 92,  50,  10],
       [ 62,  50, 146],
       [108,  56,  28],
       [ 58,  56, 164],
       [ 90,  56,  64],
       [ 70,  56, 122],
       [ 80,  58,  62],
       [ 46,  58, 174],
       [120,  60,   8],
       [ 42,  60, 146],
       [106,  66,   8],
       [ 36,  66, 184],
       [ 98,  66,  28],
       [ 42,  66, 162],
       [ 84,  68,  34],
       [ 60,  68, 146],
       [ 72,  74,  52],
       [ 40,  74, 150],
       [130,  76,  70],
       [ 48,  76