## Mapping in DataFrame

In [1]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame({
    'country': ['USA', 'China', 'Nigeria', 'Ghana', 'Japan'],
    'GDP': [19,12,45,40,4]
})
print(df)

   country  GDP
0      USA   19
1    China   12
2  Nigeria   45
3    Ghana   40
4    Japan    4


In [6]:
population_map = {
    'USA': 325,
    'China': 1421,
    'Nigeria': 1200,
    'Ghana': 8675,
    'Japan': 127
}
print(populaion_map)

{'USA': 325, 'China': 1421, 'Nigeria': 1200, 'Ghana': 8675, 'Japan': 127}


In [7]:
df['population'] = df['country'].map(population_map)

In [8]:
print(df)

   country  GDP  population
0      USA   19         325
1    China   12        1421
2  Nigeria   45        1200
3    Ghana   40        8675
4    Japan    4         127


## Replacing Values in Series

In [9]:
s1 = pd.Series([10,20,40,50,20,10,50,40])
print(s1)

0    10
1    20
2    40
3    50
4    20
5    10
6    50
7    40
dtype: int64


In [10]:
s1.replace(10, np.nan)

0     NaN
1    20.0
2    40.0
3    50.0
4    20.0
5     NaN
6    50.0
7    40.0
dtype: float64

In [12]:
s1.replace([10,20,40],[100,np.nan,80])

0    100.0
1      NaN
2     80.0
3     50.0
4      NaN
5    100.0
6     50.0
7     80.0
dtype: float64

In [13]:
s1.replace({10:100, 20:np.nan,40:5})

0    100.0
1      NaN
2      5.0
3     50.0
4      NaN
5    100.0
6     50.0
7      5.0
dtype: float64

## Renaming Indexes in DataFrame

In [14]:
df = pd.DataFrame(np.arange(6).reshape(3,2), index=['AUDI','MERCEDEZ','TOYOTA'],
                 columns=['PERFORMANCE','SAFETY'])
print(df)

          PERFORMANCE  SAFETY
AUDI                0       1
MERCEDEZ            2       3
TOYOTA              4       5


In [15]:
#rename index using mapping
df.index = df.index.map(str.lower)
print(df)

          PERFORMANCE  SAFETY
audi                0       1
mercedez            2       3
toyota              4       5


In [16]:
#using rename()
df.rename(index=str.title, columns=str.lower)

Unnamed: 0,performance,safety
Audi,0,1
Mercedez,2,3
Toyota,4,5


In [18]:
#using dict()
df.rename(index={'audi':'BMW'}, columns={'SAFETY':'SAFETY_TEST'})

Unnamed: 0,PERFORMANCE,SAFETY_TEST
BMW,0,1
mercedez,2,3
toyota,4,5


In [19]:
#saving the renamed dataframe
df.rename(index={'audi':'BMW'}, columns={'SAFETY':'SAFETY_TEST'}, inplace=True)

In [20]:
df

Unnamed: 0,PERFORMANCE,SAFETY_TEST
BMW,0,1
mercedez,2,3
toyota,4,5


## Observation, Filtering and Basic Analysis

In [21]:
sf = pd.DataFrame(np.random.randn(500,15))
print(sf)

            0         1         2         3         4         5         6  \
0   -1.172448  0.099838  0.181067  0.564849 -1.656076 -0.211645  1.024982   
1    0.292094 -2.023148 -1.461970  1.155323  0.827056 -0.527078 -0.009753   
2   -0.227213  0.412330  3.113138  1.172147  0.690827  1.026349 -0.446573   
3    0.682457 -0.408647  1.697106  0.613023 -0.168163  0.442301  1.005778   
4    1.515870  0.184545  0.071818  1.561611 -1.057839  3.376868 -0.932389   
..        ...       ...       ...       ...       ...       ...       ...   
495 -0.558746 -0.136744  1.333167 -0.357775  1.069702  0.387742 -0.395702   
496  0.483703  1.297169 -0.371555 -0.534383  0.736265  0.678770 -0.534296   
497  0.064542  0.022378  0.150757 -0.190905 -1.635441  0.180622  1.220998   
498 -0.292934 -1.536518 -0.518052  0.825119  0.639694 -0.444779 -0.030799   
499 -0.014370 -0.053699 -0.756568  1.747685  0.548369  0.536716 -0.006499   

            7         8         9        10        11        12        13  

In [22]:
sf.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,-1.172448,0.099838,0.181067,0.564849,-1.656076,-0.211645,1.024982,2.347366,-0.199224,0.718426,-0.636608,1.048696,-1.769344,-1.275757,-1.230106
1,0.292094,-2.023148,-1.46197,1.155323,0.827056,-0.527078,-0.009753,0.663447,0.864869,-0.117899,-2.109856,-0.316312,0.68912,0.382655,0.192703
2,-0.227213,0.41233,3.113138,1.172147,0.690827,1.026349,-0.446573,0.101232,-0.834067,-0.699639,-1.431328,1.663321,-0.373343,-1.113498,1.087825
3,0.682457,-0.408647,1.697106,0.613023,-0.168163,0.442301,1.005778,1.052443,2.345013,-0.822992,0.976844,0.840532,0.319822,1.125615,-0.665188
4,1.51587,0.184545,0.071818,1.561611,-1.057839,3.376868,-0.932389,0.184844,1.449025,-0.001372,-0.706906,-0.128929,-0.537589,-1.868327,-1.934202


In [23]:
sf.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
495,-0.558746,-0.136744,1.333167,-0.357775,1.069702,0.387742,-0.395702,0.098728,-2.222879,-0.627499,-0.156468,-0.510173,1.087648,2.077701,1.241741
496,0.483703,1.297169,-0.371555,-0.534383,0.736265,0.67877,-0.534296,-1.170976,-0.280879,-0.05221,-2.270639,-0.241981,-1.830061,-2.703369,0.138135
497,0.064542,0.022378,0.150757,-0.190905,-1.635441,0.180622,1.220998,0.571642,-0.647836,1.141622,0.213688,-0.425664,-0.041659,-0.16727,-0.692623
498,-0.292934,-1.536518,-0.518052,0.825119,0.639694,-0.444779,-0.030799,-0.586137,0.264001,-0.382432,-1.511416,-1.479268,-0.937196,-0.892603,0.331664
499,-0.01437,-0.053699,-0.756568,1.747685,0.548369,0.536716,-0.006499,0.462169,-0.656371,0.379333,-1.325323,-1.069886,1.100528,-1.130386,-1.514263


In [24]:
sf.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,-0.015433,-0.072714,-0.032174,-0.025373,-0.037732,0.03055,-0.008806,-0.004605,-0.062602,-0.022445,0.00847,-0.091699,-0.019849,-0.049113,0.002508
std,0.959063,1.029592,1.028893,0.974965,1.029128,0.988875,1.002396,1.006566,1.062138,0.974789,0.98696,0.948998,0.961219,1.035212,1.049923
min,-3.63558,-2.777975,-2.568318,-3.433818,-2.723063,-3.164747,-4.08332,-3.20005,-2.919987,-2.514883,-2.794734,-2.791021,-2.966028,-3.59127,-3.335489
25%,-0.640026,-0.768801,-0.789046,-0.676316,-0.69776,-0.60768,-0.639426,-0.692666,-0.727036,-0.726271,-0.672399,-0.762732,-0.68856,-0.805513,-0.691955
50%,-0.029495,-0.046384,-0.081629,-0.013786,-0.031928,0.021838,-0.034885,0.009564,-0.039143,-0.009291,-0.019734,-0.12456,-0.017751,-0.095795,0.006263
75%,0.670434,0.686673,0.656235,0.630744,0.647882,0.665722,0.648963,0.663632,0.639731,0.617955,0.681812,0.548876,0.663403,0.653331,0.698257
max,2.850001,2.665711,4.094569,3.126341,3.083438,3.376868,3.460779,2.737832,4.038371,2.703832,3.146131,2.834542,2.871876,2.905753,3.409796


In [25]:
column1 = sf[0]
print(column1.head())

0   -1.172448
1    0.292094
2   -0.227213
3    0.682457
4    1.515870
Name: 0, dtype: float64


In [28]:
column1[np.abs(column1) > 2.5]

8      2.850001
126   -3.635580
315   -2.596071
373   -2.837295
431   -2.668993
Name: 0, dtype: float64

In [29]:
sf[(np.abs(sf)>2.5).any(1)]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
2,-0.227213,0.412330,3.113138,1.172147,0.690827,1.026349,-0.446573,0.101232,-0.834067,-0.699639,-1.431328,1.663321,-0.373343,-1.113498,1.087825
4,1.515870,0.184545,0.071818,1.561611,-1.057839,3.376868,-0.932389,0.184844,1.449025,-0.001372,-0.706906,-0.128929,-0.537589,-1.868327,-1.934202
8,2.850001,-0.627405,-0.804870,-1.664825,-0.829341,0.098320,1.729049,-1.925712,-1.598976,0.608591,0.446860,1.612647,1.045053,-0.833156,0.524035
13,-0.620083,-0.462748,-0.106286,-0.100635,0.935154,1.754127,1.600170,0.762451,0.639181,1.732894,-0.358780,1.391085,-0.162976,-0.174933,3.003531
14,-0.948927,-0.090299,-2.188996,1.120945,-0.711530,-1.185999,-1.674406,-0.068342,-2.856755,-1.039538,1.509921,-0.107192,0.749949,-0.872398,-0.711347
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
482,-0.066448,0.167904,-1.207611,0.169587,-1.355780,-0.744299,1.760613,1.518504,-2.113408,-0.303475,-1.156545,0.413030,-1.328171,0.804484,-2.873916
488,0.581943,-2.514911,-1.365720,0.157333,-0.963865,1.432690,0.632552,1.225686,0.219330,0.876991,0.133120,-0.654853,-1.208691,-0.618262,0.018591
490,-0.299960,-0.541831,-1.520478,-1.040931,0.796792,-0.986360,-0.583923,-0.246490,0.541095,1.344979,-0.671163,-1.389456,0.469673,-0.660652,2.727678
494,1.209538,0.425729,-0.497200,1.139284,-0.846956,-1.295643,-1.535936,-1.635610,0.285312,-0.695090,-0.594326,-0.241158,1.048189,-0.407013,2.522482


In [33]:
sf[(np.abs(sf)>3)] = np.sign(sf)*5
sf.describe()

  """Entry point for launching an IPython kernel.


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
count,495.0,494.0,494.0,492.0,489.0,492.0,493.0,495.0,488.0,496.0,496.0,497.0,494.0,489.0,489.0
mean,0.002366,-0.052746,-0.059114,-0.035698,-0.057175,0.018688,-0.002275,0.012642,-0.054926,-0.033723,-0.003044,-0.087253,-0.019714,-0.042761,-0.015197
std,0.917491,0.996405,0.976398,0.915159,0.956385,0.927687,0.932491,0.971096,0.972982,0.949302,0.957457,0.928493,0.918784,0.957108,0.966516
min,-2.477234,-2.484785,-2.44423,-2.274975,-2.46704,-2.399071,-2.464054,-2.386052,-2.344278,-2.258634,-2.497446,-2.361858,-2.194957,-2.494248,-2.456609
25%,-0.617855,-0.730943,-0.790585,-0.673003,-0.690325,-0.604609,-0.62651,-0.684927,-0.708787,-0.726271,-0.672399,-0.743264,-0.682845,-0.78723,-0.676387
50%,-0.017548,-0.043329,-0.100414,-0.01831,-0.047411,0.021085,-0.034638,0.01853,-0.030262,-0.010378,-0.025398,-0.123452,-0.017751,-0.090962,0.001185
75%,0.673052,0.688091,0.638636,0.626988,0.608855,0.65404,0.647303,0.663818,0.626588,0.608792,0.679161,0.547145,0.649153,0.639785,0.641687
max,2.071699,2.47642,2.470189,2.397617,2.442636,2.45436,2.201027,2.441781,2.345013,2.388723,2.368956,2.324965,2.153471,2.317808,2.437566
