## Import Pandas

In [1]:
import pandas as pd

## Load Data

In [2]:
df_titanic = pd.read_csv('dataset/titanic.csv')

In [3]:
df_titanic

Unnamed: 0,Survived,Pclass,Sex,Age,Siblings/Spouses,Parents/Children,Fare
0,0,3,male,22.0,1,0,7.2500
1,1,1,female,38.0,1,0,71.2833
2,1,3,female,26.0,0,0,7.9250
3,1,1,female,35.0,1,0,53.1000
4,0,3,male,35.0,0,0,8.0500
...,...,...,...,...,...,...,...
882,0,2,male,27.0,0,0,13.0000
883,1,1,female,19.0,0,0,30.0000
884,0,3,female,7.0,1,2,23.4500
885,1,1,male,26.0,0,0,30.0000


In [4]:
df_titanic.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Siblings/Spouses,Parents/Children,Fare
0,0,3,male,22.0,1,0,7.25
1,1,1,female,38.0,1,0,71.2833
2,1,3,female,26.0,0,0,7.925
3,1,1,female,35.0,1,0,53.1
4,0,3,male,35.0,0,0,8.05


In [5]:
df_titanic.tail()

Unnamed: 0,Survived,Pclass,Sex,Age,Siblings/Spouses,Parents/Children,Fare
882,0,2,male,27.0,0,0,13.0
883,1,1,female,19.0,0,0,30.0
884,0,3,female,7.0,1,2,23.45
885,1,1,male,26.0,0,0,30.0
886,0,3,male,32.0,0,0,7.75


## Filtering Data

In [6]:
selected_columns = ['Survived', 'Pclass', 'Sex', 'Age', 'Fare']

In [7]:
titanic_fix = df_titanic[selected_columns]
titanic_fix.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Fare
0,0,3,male,22.0,7.25
1,1,1,female,38.0,71.2833
2,1,3,female,26.0,7.925
3,1,1,female,35.0,53.1
4,0,3,male,35.0,8.05


## Describe Data

In [8]:
df_titanic.dtypes

Survived              int64
Pclass                int64
Sex                  object
Age                 float64
Siblings/Spouses      int64
Parents/Children      int64
Fare                float64
dtype: object

In [9]:
df_titanic['Age'] = df_titanic['Age'].astype('int64')
df_titanic.dtypes

Survived              int64
Pclass                int64
Sex                  object
Age                   int64
Siblings/Spouses      int64
Parents/Children      int64
Fare                float64
dtype: object

In [10]:
df_titanic.shape # rows, columns

(887, 7)

In [11]:
print(f"Jumlah Baris: {df_titanic.shape[0]}\nJumlah Kolom: {df_titanic.shape[1]}")

Jumlah Baris: 887
Jumlah Kolom: 7


In [12]:
print(titanic_fix.describe())

         Survived      Pclass         Age       Fare
count  887.000000  887.000000  887.000000  887.00000
mean     0.385569    2.305524   29.471443   32.30542
std      0.487004    0.836662   14.121908   49.78204
min      0.000000    1.000000    0.420000    0.00000
25%      0.000000    2.000000   20.250000    7.92500
50%      0.000000    3.000000   28.000000   14.45420
75%      1.000000    3.000000   38.000000   31.13750
max      1.000000    3.000000   80.000000  512.32920


## Change Columns

In [13]:
df_titanic.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Siblings/Spouses,Parents/Children,Fare
0,0,3,male,22,1,0,7.25
1,1,1,female,38,1,0,71.2833
2,1,3,female,26,0,0,7.925
3,1,1,female,35,1,0,53.1
4,0,3,male,35,0,0,8.05


In [14]:
new_columns = {
    'Survived': 'Selamat',
    'Pclass': 'PCLASS',
    'Sex': 'Gender',
    'Age': 'Umur',
    'Siblings/Spouses': 'SIBSA',
    'Parents/Children': 'PARCA',
    'Fare': 'Tarif'
}

In [15]:
df_titanic = df_titanic.rename(columns=new_columns)

In [16]:
df_titanic.head()

Unnamed: 0,Selamat,PCLASS,Gender,Umur,SIBSA,PARCA,Tarif
0,0,3,male,22,1,0,7.25
1,1,1,female,38,1,0,71.2833
2,1,3,female,26,0,0,7.925
3,1,1,female,35,1,0,53.1
4,0,3,male,35,0,0,8.05


## Check Value

In [17]:
df_titanic.isnull()

Unnamed: 0,Selamat,PCLASS,Gender,Umur,SIBSA,PARCA,Tarif
0,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...
882,False,False,False,False,False,False,False
883,False,False,False,False,False,False,False
884,False,False,False,False,False,False,False
885,False,False,False,False,False,False,False


In [18]:
df_titanic.isnull().any()

Selamat    False
PCLASS     False
Gender     False
Umur       False
SIBSA      False
PARCA      False
Tarif      False
dtype: bool

In [19]:
df_titanic.fillna(0).head()

Unnamed: 0,Selamat,PCLASS,Gender,Umur,SIBSA,PARCA,Tarif
0,0,3,male,22,1,0,7.25
1,1,1,female,38,1,0,71.2833
2,1,3,female,26,0,0,7.925
3,1,1,female,35,1,0,53.1
4,0,3,male,35,0,0,8.05


## Grouping Data

In [20]:
df_titanic.groupby('Gender').mean()

Unnamed: 0_level_0,Selamat,PCLASS,Umur,SIBSA,PARCA,Tarif
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
female,0.742038,2.159236,27.710191,0.694268,0.649682,44.479818
male,0.190227,2.385689,30.411867,0.43281,0.237347,25.633935


In [21]:
df_titanic.groupby('Korban Selamat').mean()

KeyError: 'Korban Selamat'

In [None]:
df_titanic.groupby('Tarif').mean().head()

In [None]:
# Loc function

In [None]:
df_titanic.head()

In [None]:
get_under20 = df_titanic.loc[df_titanic.Umur <= 20, ['Umur','Gender']]

In [None]:
get_under20

In [None]:
get_female = df_titanic.loc[df_titanic.Gender == 'female', ['Gender','Umur']]
get_female.describe()

In [None]:
get_male = df_titanic.loc[df_titanic.Gender == 'male']
get_male

In [22]:
df_titanic['Male'] = df_titanic['Gender'] == 'male'
df_titanic['Selamat'] = df_titanic['Selamat'] == 0

In [23]:
df_titanic

Unnamed: 0,Selamat,PCLASS,Gender,Umur,SIBSA,PARCA,Tarif,Male
0,True,3,male,22,1,0,7.2500,True
1,False,1,female,38,1,0,71.2833,False
2,False,3,female,26,0,0,7.9250,False
3,False,1,female,35,1,0,53.1000,False
4,True,3,male,35,0,0,8.0500,True
...,...,...,...,...,...,...,...,...
882,True,2,male,27,0,0,13.0000,True
883,False,1,female,19,0,0,30.0000,False
884,True,3,female,7,1,2,23.4500,False
885,False,1,male,26,0,0,30.0000,True


In [40]:
df_titanic

Unnamed: 0,Selamat,PCLASS,Gender,Umur,SIBSA,PARCA,Tarif,Male
0,True,3,Laki-Laki,22,1,0,7.2500,True
1,False,1,Perempuan,38,1,0,71.2833,False
2,False,3,Perempuan,26,0,0,7.9250,False
3,False,1,Perempuan,35,1,0,53.1000,False
4,True,3,Laki-Laki,35,0,0,8.0500,True
...,...,...,...,...,...,...,...,...
882,True,2,Laki-Laki,27,0,0,13.0000,True
883,False,1,Perempuan,19,0,0,30.0000,False
884,True,3,Perempuan,7,1,2,23.4500,False
885,False,1,Laki-Laki,26,0,0,30.0000,True


In [41]:
df_titanic.loc[(df_titanic.Gender == 'male'), ['Gender']] = "Laki-Laki"
df_titanic.loc[(df_titanic.Gender == 'female'), ['Gender']] = "Perempuan"

In [42]:
df_titanic

Unnamed: 0,Selamat,PCLASS,Gender,Umur,SIBSA,PARCA,Tarif,Male
0,True,3,Laki-Laki,22,1,0,7.2500,True
1,False,1,Perempuan,38,1,0,71.2833,False
2,False,3,Perempuan,26,0,0,7.9250,False
3,False,1,Perempuan,35,1,0,53.1000,False
4,True,3,Laki-Laki,35,0,0,8.0500,True
...,...,...,...,...,...,...,...,...
882,True,2,Laki-Laki,27,0,0,13.0000,True
883,False,1,Perempuan,19,0,0,30.0000,False
884,True,3,Perempuan,7,1,2,23.4500,False
885,False,1,Laki-Laki,26,0,0,30.0000,True


In [47]:
df_titanic.loc[(df_titanic.Gender == 'Laki-Laki'), ['Gender']] = True
df_titanic.loc[(df_titanic.Gender == 'Perempuan'), ['Gender']] = False

In [50]:
df_titanic.head()

Unnamed: 0,Selamat,PCLASS,Gender,Umur,SIBSA,PARCA,Tarif,Male
0,True,3,True,22,1,0,7.25,True
1,False,1,False,38,1,0,71.2833,False
2,False,3,False,26,0,0,7.925,False
3,False,1,False,35,1,0,53.1,False
4,True,3,True,35,0,0,8.05,True


## Concatenate Data

In [52]:
df_titanic1 = df_titanic[['Selamat','PCLASS']]
df_titanic2 = df_titanic[['Gender','Umur']]
new_data = [df_titanic1, df_titanic2]
titanic_df = pd.concat(new_data, axis=1)
titanic_df

Unnamed: 0,Selamat,PCLASS,Gender,Umur
0,True,3,True,22
1,False,1,False,38
2,False,3,False,26
3,False,1,False,35
4,True,3,True,35
...,...,...,...,...
882,True,2,True,27
883,False,1,False,19
884,True,3,False,7
885,False,1,True,26


In [54]:
print(titanic_df['Umur'].values)

[22 38 26 35 35 27 54  2 27 14  4 58 20 39 14 55  2 23 31 22 35 34 15 28
  8 38 26 19 24 23 40 48 18 66 28 42 18 21 18 14 40 27  3 19 30 20 27 16
 18  7 21 49 29 65 46 21 28  5 11 22 38 45  4 64  7 29 19 17 26 32 16 21
 26 32 25 23 28  0 30 22 29 31 28 17 33 16 20 23 24 29 20 46 26 59 22 71
 23 34 34 28 29 21 33 37 28 21 29 38 28 47 14 22 20 17 21 70 29 24  2 21
 19 32 32 54 12 19 24  2 45 33 20 47 29 25 23 19 37 16 24 40 22 24 19 18
 19 27  9 36 42 51 22 55 40 27 51 16 30 37  5 44 40 26 17  1  9 48 45 60
 28 61  4  1 21 56 18  5 50 30 36  8 39  9  1  4 39 26 45 40 36 32 19 19
  3 44 58 28 42 21 24 28 17 34 45 18  2 32 26 16 40 24 35 22 30 22 31 27
 42 32 30 16 27 51 22 38 22 19 20 18 12 35 29 59  5 24 21 44  8 19 33 19
 18 29 22 30 44 25 24 37 54 18 29 62 30 41 29 38 30 35 50  3 52 40 21 36
 16 25 58 35 28 25 41 37 33 63 45 21  7 35 65 28 16 19 57 33 30 22 42 22
 26 19 36 24 24 30 23  2 47 50 20 24 19 46 28  0 42 17 30 30 24 18 26 28
 43 26 24 54 31 40 22 27 30 22 20 36 61 36 31 16 28

In [55]:
df_titanic

Unnamed: 0,Selamat,PCLASS,Gender,Umur,SIBSA,PARCA,Tarif,Male
0,True,3,True,22,1,0,7.2500,True
1,False,1,False,38,1,0,71.2833,False
2,False,3,False,26,0,0,7.9250,False
3,False,1,False,35,1,0,53.1000,False
4,True,3,True,35,0,0,8.0500,True
...,...,...,...,...,...,...,...,...
882,True,2,True,27,0,0,13.0000,True
883,False,1,False,19,0,0,30.0000,False
884,True,3,False,7,1,2,23.4500,False
885,False,1,True,26,0,0,30.0000,True


In [57]:
df_titanic.to_csv('dataset/titanic.csv')

In [58]:
df = pd.read_csv('dataset/titanic.csv')

In [59]:
df

Unnamed: 0.1,Unnamed: 0,Selamat,PCLASS,Gender,Umur,SIBSA,PARCA,Tarif,Male
0,0,True,3,True,22,1,0,7.2500,True
1,1,False,1,False,38,1,0,71.2833,False
2,2,False,3,False,26,0,0,7.9250,False
3,3,False,1,False,35,1,0,53.1000,False
4,4,True,3,True,35,0,0,8.0500,True
...,...,...,...,...,...,...,...,...,...
882,882,True,2,True,27,0,0,13.0000,True
883,883,False,1,False,19,0,0,30.0000,False
884,884,True,3,False,7,1,2,23.4500,False
885,885,False,1,True,26,0,0,30.0000,True


In [60]:
df_titanic.to_csv('dataset/titanic_fix.csv')

In [62]:
new_df = pd.read_csv('dataset/titanic_fix.csv')
new_df

Unnamed: 0.1,Unnamed: 0,Selamat,PCLASS,Gender,Umur,SIBSA,PARCA,Tarif,Male
0,0,True,3,True,22,1,0,7.2500,True
1,1,False,1,False,38,1,0,71.2833,False
2,2,False,3,False,26,0,0,7.9250,False
3,3,False,1,False,35,1,0,53.1000,False
4,4,True,3,True,35,0,0,8.0500,True
...,...,...,...,...,...,...,...,...,...
882,882,True,2,True,27,0,0,13.0000,True
883,883,False,1,False,19,0,0,30.0000,False
884,884,True,3,False,7,1,2,23.4500,False
885,885,False,1,True,26,0,0,30.0000,True


## Data Kelas

In [73]:
kelas = pd.read_excel('dataset/kelas ml.xlsx')

In [74]:
kelas

Unnamed: 0,No,Nama Mentee,Asal Daerah,Kelompok
0,1,Tafia Alifianty Dinita Putri,Bandung,Alpha
1,2,igor thaddeus sampoerna,jakarta,Alpha
2,3,Rizki Aldiansyah,Bandung,Alpha
3,4,Arfie Nugraha,Medan,Alpha
4,5,Andhika Putri Nur Firdauzi,Klaten,Alpha
5,6,M Harry Prawiro,Bogor,Alpha
6,7,Made Dwija Mahardika,Kota Denpasar,Alpha
7,8,Muhamad Ilman Sukarsa,Bandung,Alpha
8,9,JAKA SULISTIAWAN,Tangerang,Beta
9,10,M Luthfi Yusrizal,Karawang,Beta


In [75]:
alpha = kelas.loc[kelas.Kelompok == 'Alpha']
beta = kelas.loc[kelas.Kelompok == 'Beta']

In [76]:
alpha

Unnamed: 0,No,Nama Mentee,Asal Daerah,Kelompok
0,1,Tafia Alifianty Dinita Putri,Bandung,Alpha
1,2,igor thaddeus sampoerna,jakarta,Alpha
2,3,Rizki Aldiansyah,Bandung,Alpha
3,4,Arfie Nugraha,Medan,Alpha
4,5,Andhika Putri Nur Firdauzi,Klaten,Alpha
5,6,M Harry Prawiro,Bogor,Alpha
6,7,Made Dwija Mahardika,Kota Denpasar,Alpha
7,8,Muhamad Ilman Sukarsa,Bandung,Alpha


In [77]:
beta

Unnamed: 0,No,Nama Mentee,Asal Daerah,Kelompok
8,9,JAKA SULISTIAWAN,Tangerang,Beta
9,10,M Luthfi Yusrizal,Karawang,Beta
10,11,Muhammad Abdul Jabbar,Makassar,Beta
11,12,Shalsabilla Varin Ramadhanti,Pekanbaru,Beta
12,13,Petra Yohana Sitanggang,Balikpapan,Beta
13,14,Fauzan Ihza Fajar,Depok,Beta
14,15,Rafli Syawal,Bogor,Beta
15,16,Muhammad Ilham Akbar S,Lamongan,Beta
16,17,Abdul Muiz Anggit Budiyantoyo,Bekasi,Beta
