### Create DataFrame

In [1]:
import pandas as pd

In [2]:
pd.DataFrame({'Col1':[1,2], 'Col2':['a','b']})

Unnamed: 0,Col1,Col2
0,1,a
1,2,b


In [3]:
#give index as well
pd.DataFrame({'Name':['Ammara','Asma'], 'Uni':['UOS','UOS']}, index=['student1','Student2'])

Unnamed: 0,Name,Uni
student1,Ammara,UOS
Student2,Asma,UOS


### Create Series

In [4]:
Name = ['Ammara','Asma']
pd.Series(Name)

0    Ammara
1      Asma
dtype: object

#### None Element in numbers will be 'NaN' and in strings will be 'None'

In [5]:
animals = ['panda','bear',None]
pd.Series(animals)

0    panda
1     bear
2     None
dtype: object

In [7]:
num = [1,2,3,None]
pd.Series(num)

0    1.0
1    2.0
2    3.0
3    NaN
dtype: float64

#### NaN and None are not equal

In [9]:
import numpy as np
np.nan == None

False

In [10]:
np.nan == np.nan

False

In [11]:
np.isnan(np.nan)

True

#### Create Series by giving your own indices

#### Option 1: 

In [13]:
sports = {'Hockey':'Pakistan',
         'Cricket':'England',
         'Sumo':'Japan',
         'Golf':'Scotland'}
s=pd.Series(sports)
s

Hockey     Pakistan
Cricket     England
Sumo          Japan
Golf       Scotland
dtype: object

In [14]:
s.index

Index(['Hockey', 'Cricket', 'Sumo', 'Golf'], dtype='object')

#### Option 2:

In [16]:
s = pd.Series(['A','A+','B'], index=['Che','Bio','Math'])
s

Che      A
Bio     A+
Math     B
dtype: object

In [17]:
scores = {'Che':'A', 'Bio':'B', 'Math':'A+', 'Phy':'A'}
pd.Series(scores, index=['Math','Phy'])

Math    A+
Phy      A
dtype: object

### Querying a Series

In [18]:
import pandas as pd
unis = {'UET':'Pakistan',
       'UOS':'UAE',
       'TUM':'Germany',
       'NYU':'USA',
       'Oxford':'UK'}
u = pd.Series(unis)
u

UET       Pakistan
UOS            UAE
TUM        Germany
NYU            USA
Oxford          UK
dtype: object

In [19]:
u[3]

'USA'

In [20]:
u.iloc[3]

'USA'

In [21]:
u.loc['UET']

'Pakistan'

In [37]:
sports = {99: 'Bhutan',
          100: 'Scotland',
          101: 'Japan',
          102: 'South Korea'}
s = pd.Series(sports)

In [38]:
s

99          Bhutan
100       Scotland
101          Japan
102    South Korea
dtype: object

In [39]:
#s[0]  it will not work because the keys/indices are also integers

In [40]:
s.keys

<bound method Series.keys of 99          Bhutan
100       Scotland
101          Japan
102    South Korea
dtype: object>

In [41]:
s.index

Int64Index([99, 100, 101, 102], dtype='int64')

In [42]:
s.iloc[0]

'Bhutan'

In [43]:
s[99]

'Bhutan'

In [44]:
s.loc[100]

'Scotland'

In [45]:
a = pd.Series([100.00, 120.00, 101.00, 3.00])
a

0    100.0
1    120.0
2    101.0
3      3.0
dtype: float64

In [46]:
a.sum()

324.0

In [47]:
import numpy as np
np.sum(a)

324.0

In [48]:
%%timeit -n 100
a.sum()

168 µs ± 37.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [49]:
%%timeit -n 100
np.sum(a)

216 µs ± 43.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


#### Combine Series

In [3]:
import pandas as pd
s = pd.Series([1,2,3])
s.loc['Animal'] = 'Bear'
s

0            1
1            2
2            3
Animal    Bear
dtype: object

In [5]:
original_sports = pd.Series({'Archery': 'Bhutan',
                             'Golf': 'Scotland',
                             'Sumo': 'Japan',
                             'Taekwondo': 'South Korea'})
cricket_loving_countries = pd.Series(['Australia',
                                      'Barbados',
                                      'Pakistan',
                                      'England'], 
                                   index=['Cricket',
                                          'Cricket',
                                          'Cricket',
                                          'Cricket'])

In [6]:
all_countries = original_sports.append(cricket_loving_countries)

In [7]:
all_countries

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [9]:
all_countries.loc['Cricket']

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

### DataFrame

In [3]:
import pandas as pd
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_1

Name                 Chris
Item Purchased    Dog Food
Cost                  22.5
dtype: object

In [4]:
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})

In [5]:
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
df.head()

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 2,Vinod,Bird Seed,5.0


In [6]:
df.loc['Store 1']

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5


In [7]:
type(df.loc['Store 1'])

pandas.core.frame.DataFrame

In [8]:
type(df)

pandas.core.frame.DataFrame

In [9]:
df.loc['Store 1','Name']

Store 1    Chris
Store 1    Kevyn
Name: Name, dtype: object

In [10]:
df.T

Unnamed: 0,Store 1,Store 1.1,Store 2
Name,Chris,Kevyn,Vinod
Item Purchased,Dog Food,Kitty Litter,Bird Seed
Cost,22.5,2.5,5


In [11]:
df.head()

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 2,Vinod,Bird Seed,5.0


In [12]:
df.T.loc['Cost']

Store 1    22.5
Store 1     2.5
Store 2       5
Name: Cost, dtype: object

In [13]:
df['Cost']

Store 1    22.5
Store 1     2.5
Store 2     5.0
Name: Cost, dtype: float64

In [14]:
df.loc['Store 1','Cost']

Store 1    22.5
Store 1     2.5
Name: Cost, dtype: float64

In [15]:
df.loc[:,['Name','Cost']]

Unnamed: 0,Name,Cost
Store 1,Chris,22.5
Store 1,Kevyn,2.5
Store 2,Vinod,5.0


In [16]:
df.drop('Store 1')

Unnamed: 0,Name,Item Purchased,Cost
Store 2,Vinod,Bird Seed,5.0


In [17]:
df.head()

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 2,Vinod,Bird Seed,5.0


In [18]:
df1 = df.copy()
df1

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 2,Vinod,Bird Seed,5.0


In [19]:
df1 = df1.drop('Store 1')
df1

Unnamed: 0,Name,Item Purchased,Cost
Store 2,Vinod,Bird Seed,5.0


In [20]:
del df1['Name']

In [21]:
df1

Unnamed: 0,Item Purchased,Cost
Store 2,Bird Seed,5.0


In [22]:
df['Location'] = None

In [23]:
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Chris,Dog Food,22.5,
Store 1,Kevyn,Kitty Litter,2.5,
Store 2,Vinod,Bird Seed,5.0,


## DataFrame indexing and loading

In [24]:
costs = df['Cost']

In [25]:
costs+=2

In [26]:
costs

Store 1    24.5
Store 1     4.5
Store 2     7.0
Name: Cost, dtype: float64

In [27]:
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Chris,Dog Food,24.5,
Store 1,Kevyn,Kitty Litter,4.5,
Store 2,Vinod,Bird Seed,7.0,
