In [1]:
# standard imports
import pandas as pd
import numpy as np

# Pandas tutorial (based on: https://www.youtube.com/watch?v=PcvsOaixUh8)
## Series

In [4]:
list_1 = ['a', 'b','c', 'd']
labels = [1,2,3,4]
ser_1 = pd.Series(data=list_1, index=labels)
print(ser_1)
ser_1

1    a
2    b
3    c
4    d
dtype: object


1    a
2    b
3    c
4    d
dtype: object

In [9]:
dict_1 = {'f_name' : "daniel", 'l_name' : "schmied", 'age' : 38}
ser_2 = pd.Series(dict_1, name='personInfo')
ser_2['f_name']
ser_2.name

'personInfo'

## Dataframes

In [15]:
rand_arr = np.random.randint(10,50,size=(2,3))
df_1 = pd.DataFrame(rand_arr, ['Row_1', 'Row_2'], ['Col_1','Col_2','Col_3'])
df_1

Unnamed: 0,Col_1,Col_2,Col_3
Row_1,28,28,14
Row_2,46,35,32


In [26]:
persons_df = ['Daniel', 'Claudia', 'Tim']
person_attributes = {'weight': "Weight [kg]", 'height':"Height [cm]"}
weights_df = [80, 60, 25]
heights_df = [180, 160, 125]
dict_df = {person_attributes['weight'] : pd.Series(weights_df, persons_df), 
           person_attributes['height'] : pd.Series(heights_df, persons_df)}
df_2 = pd.DataFrame(dict_df)
df_2

Unnamed: 0,Weight [kg],Height [cm]
Daniel,80,180
Claudia,60,160
Tim,25,125


## Editing & retrieving Data

In [29]:
df_2[[person_attributes['weight']]]

Unnamed: 0,Weight [kg]
Daniel,80
Claudia,60
Tim,25


In [30]:
df_2.loc['Daniel']

Weight [kg]     80
Height [cm]    180
Name: Daniel, dtype: int64

In [31]:
df_2.iloc[0]

Weight [kg]     80
Height [cm]    180
Name: Daniel, dtype: int64

In [46]:
df_2['BMI'] = df_2[person_attributes['weight']] / (0.01*df_2[person_attributes['height']])**2 
df_2

Unnamed: 0,Weight [kg],Height [cm],BMI
Daniel,80,180,24.691358
Claudia,60,160,23.4375
Tim,25,125,16.0


## Conditional selection

In [47]:
array_cs = np.random.randint(10,50,size=(2,3))
array_cs

array([[13, 32, 20],
       [33, 10, 41]])

In [48]:
df_cs = pd.DataFrame(array_cs, ['row1', 'row2'], ['col1', 'col2', 'col3'])
df_cs

Unnamed: 0,col1,col2,col3
row1,13,32,20
row2,33,10,41


In [50]:
print("Greater then 40 \n", df_cs.gt(40))

Greater then 40 
        col1   col2   col3
row1  False  False  False
row2  False  False   True


In [59]:
cond = df_cs.gt(40)
df_cs[cond]

Unnamed: 0,col1,col2,col3
row1,,,
row2,,,41.0


### ways of acessing rows or columns of a dataframe

In [61]:
# setting up the dataframe
persons_df_local = ['Daniel', 'Claudia', 'Tim']
person_attributes_local = {'weight': "Weight [kg]", 'height':"Height [cm]"}
weights_df_local = [80, 60, 25]
heights_df_local = [180, 160, 125]
dict_df_local = {person_attributes_local['weight'] : pd.Series(weights_df_local, persons_df_local), 
           person_attributes_local['height'] : pd.Series(heights_df_local, persons_df_local)}
df_2_local = pd.DataFrame(dict_df_local)
df_2_local

Unnamed: 0,Weight [kg],Height [cm]
Daniel,80,180
Claudia,60,160
Tim,25,125


In [64]:
# accessing dataframe columns by attribute name
df_2_local[person_attributes_local['weight']]

Daniel     80
Claudia    60
Tim        25
Name: Weight [kg], dtype: int64

In [65]:
# acessing datagrame rows by index name ("label")
df_2_local.loc['Daniel']

Weight [kg]     80
Height [cm]    180
Name: Daniel, dtype: int64

In [66]:
# acessing datagrame rows by index
df_2_local.iloc[0]

Weight [kg]     80
Height [cm]    180
Name: Daniel, dtype: int64

## File In- / Output

In [21]:
cd_IO = pd.read_csv('ComputerSales.csv')
cd_IO

Unnamed: 0,Sale ID,Contact,Sex,Age,State,Product ID,Product Type,Sale Price,Profit,Lead,Month,Year
0,1,Paul Thomas,M,43,OH,M01-F0024,Desktop,479.99,143.39,Website,January,2018
1,2,Margo Simms,F,37,WV,GT13-0024,Desktop,1249.99,230.89,Flyer 4,January,2018
2,3,Sam Stine,M,26,PA,I3670,Desktop,649.99,118.64,Website,February,2018
3,4,Moe Eggert,M,35,PA,I3593,Laptop,399.99,72.09,Website,March,2018
4,5,Jessica Elk,F,55,PA,15M-ED,Laptop,699.99,98.09,Flyer 4,March,2018
5,6,Sally Struthers,F,45,PA,GT13-0024,Desktop,1249.99,230.89,Flyer 2,April,2018
6,7,Michelle Samms,F,46,OH,GA401IV,Laptop,1349.99,180.34,Email,May,2018
7,8,Mick Roberts,M,23,OH,MY2J2LL,Tablet,999.99,146.69,Website,July,2018
8,9,Ed Klondike,M,52,OH,81TC00,Laptop,649.99,122.34,Email,July,2018
9,10,Phil Jones,M,56,WV,M01-F0024,Desktop,479.99,143.39,Flyer 2,August,2018
