In [3]:
import pandas as pd

## Part 1 - Loading Data

In [None]:
df = pd.read_csv('Mall_Customers.csv')

In [None]:
print(f'The dimension/shape of the file is {df.shape}')

In [None]:
print('The first ten rows are')
print(df.head(10))

## Part 2 - Selecting Rows and Columns

In [7]:
print("The first 15 rows of the 'Gender' column are")
print(df[['Gender']].head(15))

The first 15 rows of the 'Gender' column are
    Gender
0     Male
1     Male
2   Female
3   Female
4   Female
5   Female
6   Female
7   Female
8     Male
9   Female
10    Male
11  Female
12  Female
13  Female
14    Male


In [8]:
print("The Age and Annual income of the 3rd and 5th customer using 'loc' are")
print(df.loc[[2,4], 'Age':'Annual Income (k$)'])

The Age and Annual income of the 3rd and 5th customer using 'loc' are
   Age  Annual Income (k$)
2   20                  16
4   31                  17


In [9]:
gender_cnt = df['Gender'].value_counts()
print(gender_cnt)
print('\n')
print('Female: ', gender_cnt[0])
print('Male: ', gender_cnt[1])

Gender
Female    112
Male       88
Name: count, dtype: int64


Female:  112
Male:  88


In [10]:
print('The age, annual income, and spending score of the 1st 20 customers')
print(df[['Age', 'Annual Income (k$)', 'Spending Score (1-100)']].head(20))

The age, annual income, and spending score of the 1st 20 customers
    Age  Annual Income (k$)  Spending Score (1-100)
0    19                  15                      39
1    21                  15                      81
2    20                  16                       6
3    23                  16                      77
4    31                  17                      40
5    22                  17                      76
6    35                  18                       6
7    23                  18                      94
8    64                  19                       3
9    30                  19                      72
10   67                  19                      14
11   35                  19                      99
12   58                  20                      15
13   24                  20                      77
14   37                  20                      13
15   22                  20                      79
16   35                  21                      

## Part 3 - Setting, Resetting, and Using Indexes

In [11]:
df.set_index(keys='CustomerID', inplace=True)
df

Unnamed: 0_level_0,Gender,Age,Annual Income (k$),Spending Score (1-100)
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Male,19,15,39
2,Male,21,15,81
3,Female,20,16,6
4,Female,23,16,77
5,Female,31,17,40
...,...,...,...,...
196,Female,35,120,79
197,Female,45,126,28
198,Male,32,126,74
199,Male,32,137,18


In [12]:
df.reset_index(inplace=True)
df

Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40
...,...,...,...,...,...
195,196,Female,35,120,79
196,197,Female,45,126,28
197,198,Male,32,126,74
198,199,Male,32,137,18


In [13]:
df2 = df.sort_values(by='Spending Score (1-100)', ascending=False)
df2

Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
11,12,Female,35,19,99
19,20,Female,35,23,98
145,146,Male,28,77,97
185,186,Male,30,99,97
127,128,Male,40,71,95
...,...,...,...,...,...
30,31,Male,60,30,4
32,33,Male,53,33,4
8,9,Male,64,19,3
158,159,Male,34,78,1


## Part 4 – Filtering

In [14]:
print('Customer’s IDs with a spending score lower than 50 points')
filtered_df = df[df['Spending Score (1-100)'] < 50]
filtered_df[['CustomerID']]

Customer’s IDs with a spending score lower than 50 points


Unnamed: 0,CustomerID
0,1
2,3
4,5
6,7
8,9
...,...
190,191
192,193
194,195
196,197


In [15]:
print('Customer’s information with an annual income of more than k$30 and a spending score lower than 50 points')
filtered_df = df[(df['Annual Income (k$)'] > 30) & (df['Spending Score (1-100)'] < 50)]
# filtered_df[['CustomerID', 'Gender', 'Age']]
filtered_df

Customer’s information with an annual income of more than k$30 and a spending score lower than 50 points


Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
32,33,Male,53,33,4
34,35,Female,49,33,14
36,37,Female,42,34,17
38,39,Female,36,37,26
40,41,Female,65,38,35
...,...,...,...,...,...
190,191,Female,34,103,23
192,193,Male,33,113,8
194,195,Female,47,120,16
196,197,Female,45,126,28
