<div class="alert alert-info">
  <h1>Session 5: Data Analysis with Pandas</h1>
</div>

In [42]:
import numpy as np
import pandas as pd

## Pandas Series

In [8]:
pd.Series([4,6,10,35])

0     4
1     6
2    10
3    35
dtype: int64

In [9]:
pd.Series(['a',6,10,'abcd'])

0       a
1       6
2      10
3    abcd
dtype: object

In [10]:
pd.Series(['a',6,10,'abcd'], ['A', 'B', 'C', 'D'])

A       a
B       6
C      10
D    abcd
dtype: object

In [11]:
my_series = pd.Series(['a',6,10,'abcd'], ['A', 'B', 'C', 'D'])

In [12]:
my_series['A']

'a'

In [13]:
my_series['D']

'abcd'

In [14]:
my_series[0]

'a'

In [15]:
my_series[3]

'abcd'

IndexError: index 4 is out of bounds for axis 0 with size 4

KeyError: 'F'

### Creating Series

###   pd.Series(data = list,index = labels)

In [19]:
labels = ['a','b','c']
my_list = [10,20,30]
arr = np.array([10,20,30])
d = {'a':10,'b':20,'c':30}

In [20]:
pd.Series(data = my_list, index = labels)

a    10
b    20
c    30
dtype: int64

In [21]:
pd.Series(data = arr)

0    10
1    20
2    30
dtype: int32

In [22]:
pd.Series(data = arr, index = labels)

a    10
b    20
c    30
dtype: int32

In [26]:
pd.Series(data = d)

a    10
b    20
c    30
dtype: int64

In [27]:
pd.Series(data = d, index = [1,2,3])

1   NaN
2   NaN
3   NaN
dtype: float64

In [30]:
sells = pd.Series([1000,2000,2500,1500],index = ['SAT','SUN','MON','TUE'])

In [31]:
sells

SAT    1000
SUN    2000
MON    2500
TUE    1500
dtype: int64

In [32]:
sells['SUN']

2000

In [41]:
sells[0]

1000

## <font color = red> Working with Series </font>

In [33]:
sells1 = pd.Series([1000,2000,2500,1500],index = ['SAT','SUN','MON','TUE'])

In [36]:
sells2 = pd.Series([500,1100,2000,3500],index = ['SAT','SUN','THU', 'FRI'])

In [37]:
sells1 + sells2

FRI       NaN
MON       NaN
SAT    1500.0
SUN    3100.0
THU       NaN
TUE       NaN
dtype: float64

In [38]:
sells1 - sells2

FRI      NaN
MON      NaN
SAT    500.0
SUN    900.0
THU      NaN
TUE      NaN
dtype: float64

In [39]:
sells1 * sells2

FRI          NaN
MON          NaN
SAT     500000.0
SUN    2200000.0
THU          NaN
TUE          NaN
dtype: float64

In [40]:
sells1 / sells2

FRI         NaN
MON         NaN
SAT    2.000000
SUN    1.818182
THU         NaN
TUE         NaN
dtype: float64

## Pandas DataFrame

In [52]:
from numpy import random 


### Creating DataFrames

In [227]:
random.seed(102)
df = pd.DataFrame(data = random.randn(6,4), index = ['A', 'B', 'C', 'D','E', 'F'], columns = ['X', 'Y', 'Z', 'W'])

In [228]:
df

Unnamed: 0,X,Y,Z,W
A,1.668068,0.925862,1.057997,-0.920339
B,1.299748,0.331183,-0.509845,-0.903099
C,-0.130016,-2.238203,0.973165,-0.024185
D,-0.484928,-1.109264,-0.558975,1.042387
E,-1.712263,0.13612,-0.464444,0.05098
F,1.447899,0.593138,-0.755616,-0.627166


### getting info about DF

In [229]:
df.columns

Index(['X', 'Y', 'Z', 'W'], dtype='object')

In [230]:
df.index

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [234]:
df.describe()

Unnamed: 0,X,Y,Z,W
count,6.0,6.0,6.0,6.0
mean,0.348085,-0.226861,-0.042953,-0.230237
std,1.343513,1.205692,0.826365,0.752017
min,-1.712263,-2.238203,-0.755616,-0.920339
25%,-0.3962,-0.797918,-0.546692,-0.834116
50%,0.584866,0.233651,-0.487144,-0.325675
75%,1.410861,0.52765,0.613763,0.032189
max,1.668068,0.925862,1.057997,1.042387


In [235]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, A to F
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X       6 non-null      float64
 1   Y       6 non-null      float64
 2   Z       6 non-null      float64
 3   W       6 non-null      float64
dtypes: float64(4)
memory usage: 240.0+ bytes


### DataFrames Indexing

In [57]:
df['X']

A    0.884035
B    0.728750
C    0.997190
D   -1.867169
E    1.172287
F    1.553394
Name: X, dtype: float64

In [58]:
type(df['X'])

pandas.core.series.Series

In [60]:
df[['X','Y']]

Unnamed: 0,X,Y
A,0.884035,0.162718
B,0.72875,0.02914
C,0.99719,3.428563
D,-1.867169,1.443434
E,1.172287,-1.571128
F,1.553394,0.70122


In [61]:
type(df[['X','Y']])

pandas.core.frame.DataFrame

In [127]:
df[['Y', 'W', 'X', 'Z']]

Unnamed: 0,Y,W,X,Z
A,0.925862,-0.920339,1.668068,1.057997
B,0.331183,-0.903099,1.299748,-0.509845
C,-2.238203,-0.024185,-0.130016,0.973165
D,-1.109264,1.042387,-0.484928,-0.558975
E,0.13612,0.05098,-1.712263,-0.464444
F,0.593138,-0.627166,1.447899,-0.755616


In [124]:
df.loc['A']

X    1.668068
Y    0.925862
Z    1.057997
W   -0.920339
Name: A, dtype: float64

In [66]:
type(df.loc['A'])

pandas.core.series.Series

In [125]:
df.loc[['A','D']]

Unnamed: 0,X,Y,Z,W
A,1.668068,0.925862,1.057997,-0.920339
D,-0.484928,-1.109264,-0.558975,1.042387


In [126]:
# iloc
df.iloc[0]

X    1.668068
Y    0.925862
Z    1.057997
W   -0.920339
Name: A, dtype: float64

In [128]:
df.iloc[3]

X   -0.484928
Y   -1.109264
Z   -0.558975
W    1.042387
Name: D, dtype: float64

In [130]:
df

Unnamed: 0,X,Y,Z,W
A,1.668068,0.925862,1.057997,-0.920339
B,1.299748,0.331183,-0.509845,-0.903099
C,-0.130016,-2.238203,0.973165,-0.024185
D,-0.484928,-1.109264,-0.558975,1.042387
E,-1.712263,0.13612,-0.464444,0.05098
F,1.447899,0.593138,-0.755616,-0.627166


### Accessing the entries

In [131]:
df.iloc[0,3]

-0.9203390137491348

In [132]:
df.loc['A', 'W']

-0.9203390137491348

In [133]:
df['W']['A']

-0.9203390137491348

### subframes

In [134]:
df.loc[['A','B'],['X','Y']]

Unnamed: 0,X,Y
A,1.668068,0.925862
B,1.299748,0.331183


In [135]:
df.loc[['A','D'],['X','W']]

Unnamed: 0,X,W
A,1.668068,-0.920339
D,-0.484928,1.042387


In [136]:
df.iloc[[0,3], [0,2]]

Unnamed: 0,X,Z
A,1.668068,1.057997
D,-0.484928,-0.558975


In [139]:
mat = np.arange(1,26).reshape(5,5)

In [138]:
mat

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [140]:
mat

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [141]:
df2 = pd.DataFrame(mat)

## 

In [142]:
df2

Unnamed: 0,0,1,2,3,4
0,1,2,3,4,5
1,6,7,8,9,10
2,11,12,13,14,15
3,16,17,18,19,20
4,21,22,23,24,25


In [144]:
df2.loc[[0,2,4],[0,2,4]]

Unnamed: 0,0,2,4
0,1,3,5
2,11,13,15
4,21,23,25


In [145]:
df2[[0,2]]

Unnamed: 0,0,2
0,1,3
1,6,8
2,11,13
3,16,18
4,21,23


In [147]:
df2.loc[[0,2]]

Unnamed: 0,0,1,2,3,4
0,1,2,3,4,5
2,11,12,13,14,15


In [149]:
df2.loc[[3,3]]

Unnamed: 0,0,1,2,3,4
3,16,17,18,19,20
3,16,17,18,19,20


In [150]:
df2.loc[3,3]

19

# Data Preparation
## <font color = red> Modifying DataFrames </font>

In [290]:
random.seed(102)
df = pd.DataFrame(random.randint(1000,2000,(6,4)),index = ['Tehran', 'Shiraz', 'Yazd', 'Tabriz','Mashhad', 'Kerman'], columns = ['Laptop', 'Celphone', 'Camera', 'TV'])

In [291]:
df

Unnamed: 0,Laptop,Celphone,Camera,TV
Tehran,1256,1755,1590,1242
Shiraz,1626,1879,1834,1969
Yazd,1968,1045,1328,1073
Tabriz,1079,1903,1772,1759
Mashhad,1624,1678,1466,1439
Kerman,1387,1992,1128,1208


#### creating a new column

In [105]:
df['SUM'] = df['Laptop']+ df['Celphone'] + df['Camera'] + df['TV']

In [106]:
df

Unnamed: 0,Laptop,Celphone,Camera,TV,SUM
Tehran,1256,1755,1590,1242,5843
Shiraz,1626,1879,1834,1969,7308
Yazd,1968,1045,1328,1073,5414
Tabriz,1079,1903,1772,1759,6513
Mashhad,1624,1678,1466,1439,6207
Kerman,1387,1992,1128,1208,5715


In [174]:
df['Book'] = np.random.randint(100,2000,6)
df

Unnamed: 0,Laptop,Celphone,Camera,TV,Book
Tehran,1256,1755,1590,1242,1065
Shiraz,1626,1879,1834,1969,204
Yazd,1968,1045,1328,1073,1803
Tabriz,1079,1903,1772,1759,875
Mashhad,1624,1678,1466,1439,1776
Kerman,1387,1992,1128,1208,398


#### removing columns or rows

KeyError: 0

In [101]:
df.drop(['TV'], axis = 1)  # column

Unnamed: 0,Laptop,Celphone,Camera,SUM
Tehran,1256,1755,1590,5843
Shiraz,1626,1879,1834,7308
Yazd,1968,1045,1328,5414
Tabriz,1079,1903,1772,6513
Mashhad,1624,1678,1466,6207
Kerman,1387,1992,1128,5715


In [109]:
df

Unnamed: 0,Laptop,Celphone,Camera,TV,SUM
Tehran,1256,1755,1590,1242,5843
Shiraz,1626,1879,1834,1969,7308
Yazd,1968,1045,1328,1073,5414
Tabriz,1079,1903,1772,1759,6513
Mashhad,1624,1678,1466,1439,6207
Kerman,1387,1992,1128,1208,5715


In [107]:
df.drop(columns = ['TV'])  # column

Unnamed: 0,Laptop,Celphone,Camera,SUM
Tehran,1256,1755,1590,5843
Shiraz,1626,1879,1834,7308
Yazd,1968,1045,1328,5414
Tabriz,1079,1903,1772,6513
Mashhad,1624,1678,1466,6207
Kerman,1387,1992,1128,5715


In [110]:
df.drop(columns = ['TV'], inplace = True)  # permannet

In [111]:
df

Unnamed: 0,Laptop,Celphone,Camera,SUM
Tehran,1256,1755,1590,5843
Shiraz,1626,1879,1834,7308
Yazd,1968,1045,1328,5414
Tabriz,1079,1903,1772,6513
Mashhad,1624,1678,1466,6207
Kerman,1387,1992,1128,5715


In [112]:
df.drop(['Yazd'], axis = 0) # row

Unnamed: 0,Laptop,Celphone,Camera,SUM
Tehran,1256,1755,1590,5843
Shiraz,1626,1879,1834,7308
Tabriz,1079,1903,1772,6513
Mashhad,1624,1678,1466,6207
Kerman,1387,1992,1128,5715


In [114]:
df.drop(index = ['Yazd']) # row

Unnamed: 0,Laptop,Celphone,Camera,SUM
Tehran,1256,1755,1590,5843
Shiraz,1626,1879,1834,7308
Tabriz,1079,1903,1772,6513
Mashhad,1624,1678,1466,6207
Kerman,1387,1992,1128,5715


In [115]:
df

Unnamed: 0,Laptop,Celphone,Camera,SUM
Tehran,1256,1755,1590,5843
Shiraz,1626,1879,1834,7308
Yazd,1968,1045,1328,5414
Tabriz,1079,1903,1772,6513
Mashhad,1624,1678,1466,6207
Kerman,1387,1992,1128,5715


### Permanently Removing a Column

In [221]:
del df2['Price']

In [222]:
df2

Unnamed: 0,ID,Model
0,1,M1
1,2,M2
2,3,M2
3,4,M3
4,5,M1
5,6,M1


### Renaming columns

In [294]:
df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
df

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [295]:
df.rename(columns = {'A': 'aa', 'B':'bb'})

Unnamed: 0,aa,bb
0,1,4
1,2,5
2,3,6


In [296]:
df.rename(index = {0: 100, 1:200, 2:300})

Unnamed: 0,A,B
100,1,4
200,2,5
300,3,6


In [300]:
df.index.names

FrozenList([None])

In [303]:
df.index.name = 'Transactions'
df

Unnamed: 0_level_0,A,B
Transactions,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1,4
1,2,5
2,3,6


In [306]:
df.columns.name = 'kind'
df

kind,A,B
Transactions,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1,4
1,2,5
2,3,6


### Conditional Selection (Query)

In [153]:
random.seed(102)
df = pd.DataFrame(random.randint(1000,2000,(6,4)),index = ['Tehran', 'Shiraz', 'Yazd', 'Tabriz','Mashhad', 'Kerman'], columns = ['Laptop', 'Celphone', 'Camera', 'TV'])

In [154]:
df

Unnamed: 0,Laptop,Celphone,Camera,TV
Tehran,1256,1755,1590,1242
Shiraz,1626,1879,1834,1969
Yazd,1968,1045,1328,1073
Tabriz,1079,1903,1772,1759
Mashhad,1624,1678,1466,1439
Kerman,1387,1992,1128,1208


In [155]:
df>1500

Unnamed: 0,Laptop,Celphone,Camera,TV
Tehran,False,True,True,False
Shiraz,True,True,True,True
Yazd,True,False,False,False
Tabriz,False,True,True,True
Mashhad,True,True,False,False
Kerman,False,True,False,False


In [156]:
df[df>1500]

Unnamed: 0,Laptop,Celphone,Camera,TV
Tehran,,1755.0,1590.0,
Shiraz,1626.0,1879.0,1834.0,1969.0
Yazd,1968.0,,,
Tabriz,,1903.0,1772.0,1759.0
Mashhad,1624.0,1678.0,,
Kerman,,1992.0,,


In [157]:
df[df['Celphone']>1500]

Unnamed: 0,Laptop,Celphone,Camera,TV
Tehran,1256,1755,1590,1242
Shiraz,1626,1879,1834,1969
Tabriz,1079,1903,1772,1759
Mashhad,1624,1678,1466,1439
Kerman,1387,1992,1128,1208


In [158]:
df['Celphone']>1500

Tehran      True
Shiraz      True
Yazd       False
Tabriz      True
Mashhad     True
Kerman      True
Name: Celphone, dtype: bool

In [161]:
df[df['Celphone']>1500][['Camera', 'TV']]

Unnamed: 0,Camera,TV
Tehran,1590,1242
Shiraz,1834,1969
Tabriz,1772,1759
Mashhad,1466,1439
Kerman,1128,1208


In [163]:
df[df['Celphone']>1500].loc['Tabriz', 'TV']

1759

In [164]:
df

Unnamed: 0,Laptop,Celphone,Camera,TV
Tehran,1256,1755,1590,1242
Shiraz,1626,1879,1834,1969
Yazd,1968,1045,1328,1073
Tabriz,1079,1903,1772,1759
Mashhad,1624,1678,1466,1439
Kerman,1387,1992,1128,1208


In [166]:
## combining conditions
df[(df['Celphone']>1200) & (df['TV']<1800)]

Unnamed: 0,Laptop,Celphone,Camera,TV
Tehran,1256,1755,1590,1242
Tabriz,1079,1903,1772,1759
Mashhad,1624,1678,1466,1439
Kerman,1387,1992,1128,1208


In [168]:
df[(df['Laptop']<1200) | (df['TV']>1500)]

Unnamed: 0,Laptop,Celphone,Camera,TV
Shiraz,1626,1879,1834,1969
Tabriz,1079,1903,1772,1759


### Working with index

In [175]:
df

Unnamed: 0,Laptop,Celphone,Camera,TV,Book
Tehran,1256,1755,1590,1242,1065
Shiraz,1626,1879,1834,1969,204
Yazd,1968,1045,1328,1073,1803
Tabriz,1079,1903,1772,1759,875
Mashhad,1624,1678,1466,1439,1776
Kerman,1387,1992,1128,1208,398


In [176]:
df.reset_index()

Unnamed: 0,index,Laptop,Celphone,Camera,TV,Book
0,Tehran,1256,1755,1590,1242,1065
1,Shiraz,1626,1879,1834,1969,204
2,Yazd,1968,1045,1328,1073,1803
3,Tabriz,1079,1903,1772,1759,875
4,Mashhad,1624,1678,1466,1439,1776
5,Kerman,1387,1992,1128,1208,398


In [178]:
df

Unnamed: 0,Laptop,Celphone,Camera,TV,Book
Tehran,1256,1755,1590,1242,1065
Shiraz,1626,1879,1834,1969,204
Yazd,1968,1045,1328,1073,1803
Tabriz,1079,1903,1772,1759,875
Mashhad,1624,1678,1466,1439,1776
Kerman,1387,1992,1128,1208,398


In [179]:
df2 = df.reset_index()

In [180]:
df2

Unnamed: 0,index,Laptop,Celphone,Camera,TV,Book
0,Tehran,1256,1755,1590,1242,1065
1,Shiraz,1626,1879,1834,1969,204
2,Yazd,1968,1045,1328,1073,1803
3,Tabriz,1079,1903,1772,1759,875
4,Mashhad,1624,1678,1466,1439,1776
5,Kerman,1387,1992,1128,1208,398


In [183]:
df2.loc[0]

index       Tehran
Laptop        1256
Celphone      1755
Camera        1590
TV            1242
Book          1065
Name: 0, dtype: object

In [191]:
df2 = df.reset_index(drop = True)

In [192]:
df2

Unnamed: 0,Laptop,Celphone,Camera,TV,Book
0,1256,1755,1590,1242,1065
1,1626,1879,1834,1969,204
2,1968,1045,1328,1073,1803
3,1079,1903,1772,1759,875
4,1624,1678,1466,1439,1776
5,1387,1992,1128,1208,398


In [187]:
df2.set_index('index')

Unnamed: 0_level_0,Laptop,Celphone,Camera,TV,Book
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Tehran,1256,1755,1590,1242,1065
Shiraz,1626,1879,1834,1969,204
Yazd,1968,1045,1328,1073,1803
Tabriz,1079,1903,1772,1759,875
Mashhad,1624,1678,1466,1439,1776
Kerman,1387,1992,1128,1208,398


In [188]:
df2.set_index('index', drop = False)

Unnamed: 0_level_0,index,Laptop,Celphone,Camera,TV,Book
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Tehran,Tehran,1256,1755,1590,1242,1065
Shiraz,Shiraz,1626,1879,1834,1969,204
Yazd,Yazd,1968,1045,1328,1073,1803
Tabriz,Tabriz,1079,1903,1772,1759,875
Mashhad,Mashhad,1624,1678,1466,1439,1776
Kerman,Kerman,1387,1992,1128,1208,398


In [196]:
s = pd.Series([1, 2, 3, 4,5,6])

In [197]:
s

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [198]:
df2.set_index(s*2)

Unnamed: 0,Laptop,Celphone,Camera,TV,Book
2,1256,1755,1590,1242,1065
4,1626,1879,1834,1969,204
6,1968,1045,1328,1073,1803
8,1079,1903,1772,1759,875
10,1624,1678,1466,1439,1776
12,1387,1992,1128,1208,398


In [204]:
df2

Unnamed: 0,Laptop,Celphone,Camera,TV,Book
0,1256,1755,1590,1242,1065
1,1626,1879,1834,1969,204
2,1968,1045,1328,1073,1803
3,1079,1903,1772,1759,875
4,1624,1678,1466,1439,1776
5,1387,1992,1128,1208,398


In [200]:
#reindex()

In [211]:
index = ['Firefox', 'Chrome', 'Safari', 'IE11', 'Konqueror']
df3 = f = pd.DataFrame({'http_status': [200, 200, 404, 404, 301],'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}, index=index)

In [212]:
df3

Unnamed: 0,http_status,response_time
Firefox,200,0.04
Chrome,200,0.02
Safari,404,0.07
IE11,404,0.08
Konqueror,301,1.0


In [213]:
new_index = ['Safari', 'Iceweasel', 'Edge', 'IE11', 'Chrome']

In [214]:
df3.reindex(new_index)

Unnamed: 0,http_status,response_time
Safari,404.0,0.07
Iceweasel,,
Edge,,
IE11,404.0,0.08
Chrome,200.0,0.02


In [215]:
df3.reindex(new_index,fill_value=0)

Unnamed: 0,http_status,response_time
Safari,404,0.07
Iceweasel,0,0.0
Edge,0,0.0
IE11,404,0.08
Chrome,200,0.02


In [222]:
df3.index

Index(['Firefox', 'Chrome', 'Safari', 'IE11', 'Konqueror'], dtype='object')

In [223]:
df3.index.name = 'browsers'

In [224]:
df3

Unnamed: 0_level_0,http_status,response_time
browsers,Unnamed: 1_level_1,Unnamed: 2_level_1
Firefox,200,0.04
Chrome,200,0.02
Safari,404,0.07
IE11,404,0.08
Konqueror,301,1.0


In [225]:
df3.columns

Index(['http_status', 'response_time'], dtype='object')

In [237]:
df3.columns.name = 'features'

In [238]:
df3

features,http_status,response_time
browsers,Unnamed: 1_level_1,Unnamed: 2_level_1
Firefox,200,0.04
Chrome,200,0.02
Safari,404,0.07
IE11,404,0.08
Konqueror,301,1.0


## Multi Index 

In [249]:
Cities = ['Tehran','Tehran','Tehran','Shiraz','Shiraz','Shiraz']
Goods = ['A','B','C','D','E','F']

In [250]:
index = list(zip(Cities,Goods))

In [251]:
index

[('Tehran', 'A'),
 ('Tehran', 'B'),
 ('Tehran', 'C'),
 ('Shiraz', 'D'),
 ('Shiraz', 'E'),
 ('Shiraz', 'F')]

In [252]:
multi_index = pd.MultiIndex.from_tuples(index)

In [253]:
multi_index

MultiIndex([('Tehran', 'A'),
            ('Tehran', 'B'),
            ('Tehran', 'C'),
            ('Shiraz', 'D'),
            ('Shiraz', 'E'),
            ('Shiraz', 'F')],
           )

In [254]:
type(multi_index)

pandas.core.indexes.multi.MultiIndex

In [256]:
df = pd.DataFrame(np.random.randint(1000,3000, (6,3)),index=multi_index,columns=['Celphone','Laptop', 'Camera'])

In [257]:
df

Unnamed: 0,Unnamed: 1,Celphone,Laptop,Camera
Tehran,A,2486,2404,1101
Tehran,B,1998,1076,2496
Tehran,C,2670,1507,2219
Shiraz,D,2883,2578,2783
Shiraz,E,2218,1753,1947
Shiraz,F,1694,1704,2677


In [258]:
df.loc['Tehran']

Unnamed: 0,Celphone,Laptop,Camera
A,2486,2404,1101
B,1998,1076,2496
C,2670,1507,2219


In [259]:
df.loc['Tehran'].loc['A']

Celphone    2486
Laptop      2404
Camera      1101
Name: A, dtype: int32

#### Cross Section

In [274]:
df.loc['Tehran'].loc['B']

Goods
Celphone    1998
Laptop      1076
Camera      2496
Name: B, dtype: int32

In [275]:
df.xs(('Tehran','B'))

Goods
Celphone    1998
Laptop      1076
Camera      2496
Name: (Tehran, B), dtype: int32

In [260]:
df.loc['Tehran']['Camera']

A    1101
B    2496
C    2219
Name: Camera, dtype: int32

In [288]:
df.xs('B', level =1)

Goods,Celphone,Laptop,Camera
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tehran,1998,1076,2496


In [289]:
df.xs('Shiraz', level =0)

Goods,Celphone,Laptop,Camera
Store,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D,2883,2578,2783
E,2218,1753,1947
F,1694,1704,2677


In [261]:
df.index

MultiIndex([('Tehran', 'A'),
            ('Tehran', 'B'),
            ('Tehran', 'C'),
            ('Shiraz', 'D'),
            ('Shiraz', 'E'),
            ('Shiraz', 'F')],
           )

In [263]:
df.index.names=['City', 'Store']

In [264]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Celphone,Laptop,Camera
City,Store,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tehran,A,2486,2404,1101
Tehran,B,1998,1076,2496
Tehran,C,2670,1507,2219
Shiraz,D,2883,2578,2783
Shiraz,E,2218,1753,1947
Shiraz,F,1694,1704,2677


In [280]:
df.columns.name = 'Goods'

In [284]:
df

Unnamed: 0_level_0,Goods,Celphone,Laptop,Camera
City,Store,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Tehran,A,2486,2404,1101
Tehran,B,1998,1076,2496
Tehran,C,2670,1507,2219
Shiraz,D,2883,2578,2783
Shiraz,E,2218,1753,1947
Shiraz,F,1694,1704,2677
