In [1]:
import numpy as np
import pandas as pd

In [2]:
df_movie_rating=pd.DataFrame({'movie 1': [5,4,3,3,2,1],
                             'movie 2': [4,5,2,3,4,2]}, 
                             index=['Tom','Jeff','Peter','Ram','Ted','Paul'])

In [3]:
df_movie_rating


Unnamed: 0,movie 1,movie 2
Tom,5,4
Jeff,4,5
Peter,3,2
Ram,3,3
Ted,2,4
Paul,1,2


In [4]:
def movie_grade(rating):
    if rating==5:
        return 'A'
    if rating==4:
        return 'B'
    if rating==3:
        return 'C'
    else:
        return 'F'

In [5]:
print(movie_grade(5))

A


In [6]:
df_movie_rating.applymap(movie_grade)

Unnamed: 0,movie 1,movie 2
Tom,A,B
Jeff,B,A
Peter,C,F
Ram,C,C
Ted,F,B
Paul,F,F


In [7]:
df_test_scores=pd.DataFrame({
    'Test1': [95,84,72,88,82,61],
    'Test2': [74,85,82,73,77,79]
}, index=['Jack','Lewis','Patrick','Rich','Kelly', 'Paula'])

In [8]:
df_test_scores.max()

Test1    95
Test2    85
dtype: int64

In [9]:
df_test_scores.mean()

Test1    80.333333
Test2    78.333333
dtype: float64

In [10]:
df_test_scores.std()

Test1    12.110601
Test2     4.633213
dtype: float64

## Data standardization

In [13]:
def standardize_test(test):
    return (test-test.mean())/test.std()

In [15]:
standardize_test(df_test_scores['Test1'])

Jack       1.211060
Lewis      0.302765
Patrick   -0.688102
Rich       0.633054
Kelly      0.137620
Paula     -1.596397
Name: Test1, dtype: float64

In [16]:
def standardize_test_scores(datafrm):
    return datafrm.apply(standardize_test)

In [17]:
standardize_test_scores(df_test_scores)

Unnamed: 0,Test1,Test2
Jack,1.21106,-0.935276
Lewis,0.302765,1.438886
Patrick,-0.688102,0.791387
Rich,0.633054,-1.151109
Kelly,0.13762,-0.287777
Paula,-1.596397,0.143889


## Data Operations

In [22]:
df_student_math=pd.DataFrame({
        'student': ['Tom','Jack','Dan','Ram','Jeff','David'],
        'ID': [10,56,31,85,9,22]
})

In [23]:
df_student_science=pd.DataFrame({
        'student': ['Tom','Ram','David'],
        'ID': [10,12,22]
})

In [24]:
pd.merge(df_student_math,df_student_science)

Unnamed: 0,student,ID
0,Tom,10
1,David,22


In [26]:
# return students present in both datasets

pd.merge(df_student_math,df_student_science, on='student')

Unnamed: 0,student,ID_x,ID_y
0,Tom,10,10
1,Ram,85,12
2,David,22,22


In [27]:
pd.merge(df_student_math,df_student_science, on='ID',how='left').fillna('X')

Unnamed: 0,student_x,ID,student_y
0,Tom,10,Tom
1,Jack,56,X
2,Dan,31,X
3,Ram,85,X
4,Jeff,9,X
5,David,22,David


In [28]:
pd.concat([df_student_math, df_student_science],ignore_index=True)

Unnamed: 0,student,ID
0,Tom,10
1,Jack,56
2,Dan,31
3,Ram,85
4,Jeff,9
5,David,22
6,Tom,10
7,Ram,12
8,David,22


In [29]:
df_student_survey_data=pd.DataFrame({
        'student': ['Tom','Jack','Tom','Ram','Jeff','Jack'],
        'ID': [10,56,10,85,9,56]
})


In [30]:
df_student_survey_data


Unnamed: 0,student,ID
0,Tom,10
1,Jack,56
2,Tom,10
3,Ram,85
4,Jeff,9
5,Jack,56


In [32]:
df_student_survey_data.duplicated()

0    False
1    False
2     True
3    False
4    False
5     True
dtype: bool

In [33]:
df_student_survey_data.drop_duplicates('student')

Unnamed: 0,student,ID
0,Tom,10
1,Jack,56
3,Ram,85
4,Jeff,9


In [34]:
df_student_survey_data.drop_duplicates('ID')

Unnamed: 0,student,ID
0,Tom,10
1,Jack,56
3,Ram,85
4,Jeff,9
