In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame                      #Imported separately -> instead of using pd.Series etc.

In [5]:
#Series (An array with data labels)
test = Series([2, 4, 6, 8, 10])
print(test)                                               #Each value is indexed
print('\n')
print(test.values)
print('\n')
print(test.index)

0     2
1     4
2     6
3     8
4    10
dtype: int64


[ 2  4  6  8 10]


RangeIndex(start=0, stop=5, step=1)


In [15]:
test2 = Series([55650000, 25250000, 78000000, 500000, 250000], index =['USA', 'Germany', 'UK', 'China', 'Ireland'])
print(test2)
print('\n')
print(test2['UK'])
print(test2['Germany'] + test2['UK'])
print('\n')
print(test2 > 1000000)
print('\n')
print('USA' in test2)

USA        55650000
Germany    25250000
UK         78000000
China        500000
Ireland      250000
dtype: int64


78000000
103250000


USA         True
Germany     True
UK          True
China      False
Ireland    False
dtype: bool


True


In [17]:
test2_dict = test2.to_dict()                       #Turning a series into a dictionary
print(test2_dict)
print('\n')
test2_series = Series(test2_dict)                  #Converting a dictionary back into a series
print(test2_series)

{'USA': 55650000, 'Germany': 25250000, 'UK': 78000000, 'China': 500000, 'Ireland': 250000}


USA        55650000
Germany    25250000
UK         78000000
China        500000
Ireland      250000
dtype: int64


In [22]:
countries = ['Ireland', 'UK', 'Holland', 'Sweden', 'Finland', 'USA']

In [23]:
test3 = Series(test2_dict, index = countries)                       #Checking if above set matches with above dictionary
print(test3)

Ireland      250000.0
UK         78000000.0
Holland           NaN
Sweden            NaN
Finland           NaN
USA        55650000.0
dtype: float64


In [24]:
pd.isnull(test3)                         #Checking where there are null values from above

Ireland    False
UK         False
Holland     True
Sweden      True
Finland     True
USA        False
dtype: bool

In [27]:
print(test2_series)
print('\n')
print(test3)
print('\n')
print(test2_series + test3)

USA        55650000
Germany    25250000
UK         78000000
China        500000
Ireland      250000
dtype: int64


Ireland      250000.0
UK         78000000.0
Holland           NaN
Sweden            NaN
Finland           NaN
USA        55650000.0
dtype: float64


China              NaN
Finland            NaN
Germany            NaN
Holland            NaN
Ireland       500000.0
Sweden             NaN
UK         156000000.0
USA        111300000.0
dtype: float64


In [28]:
test2.name = 'Random example'                        #Naming series'
print(test2)

USA        55650000
Germany    25250000
UK         78000000
China        500000
Ireland      250000
Name: Random example, dtype: int64


In [29]:
test3.name = 'Countries'
print(test3)

Ireland      250000.0
UK         78000000.0
Holland           NaN
Sweden            NaN
Finland           NaN
USA        55650000.0
Name: Countries, dtype: float64


In [33]:
#DataFrame
import webbrowser                                                    #Used in order to access certain websites
websites = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'
webbrowser.open(websites)

True

In [37]:
nfl_data = pd.read_clipboard()
nfl_data

  return read_table(StringIO(text), sep=sep, **kwargs)


Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,882,502,374,6,0.573,1960,NFC East
1,2,Green Bay Packers,1336,737,562,37,0.565,1921,NFC North
2,3,Chicago Bears,1370,749,579,42,0.562,1920,NFC North
3,4,Miami Dolphins,800,445,351,4,0.559,1966,AFC East
4,5,New England Patriots[b],884,489,386,9,0.558,1960,AFC East


In [40]:
print(nfl_data.columns)

Index(['Rank', 'Team', 'GP', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Division'],
      dtype='object')


In [89]:
nfl_data.Team                            #Can't be used with 2 words

0             Dallas Cowboys
1          Green Bay Packers
2              Chicago Bears
3             Miami Dolphins
4    New England Patriots[b]
Name: Team, dtype: object

In [92]:
nfl_data.Tied

0     6
1    37
2    42
3     4
4     9
Name: Tied, dtype: int64

In [45]:
print(nfl_data['First NFL Season'])

0    1960
1    1921
2    1920
3    1966
4    1960
Name: First NFL Season, dtype: int64


In [49]:
DataFrame(nfl_data, columns = ['Team', 'First NFL Season', 'GP', 'Stadium'])  
#Errors ('Games Played') are filled with null values

Unnamed: 0,Team,First NFL Season,GP,Games Played
0,Dallas Cowboys,1960,882,
1,Green Bay Packers,1921,1336,
2,Chicago Bears,1920,1370,
3,Miami Dolphins,1966,800,
4,New England Patriots[b],1960,884,


In [83]:
nfl_data.head()                                      #'Head' and 'tail' are set to 5 as a default 

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,882,502,374,6,0.573,1960,NFC East
1,2,Green Bay Packers,1336,737,562,37,0.565,1921,NFC North
2,3,Chicago Bears,1370,749,579,42,0.562,1920,NFC North
3,4,Miami Dolphins,800,445,351,4,0.559,1966,AFC East
4,5,New England Patriots[b],884,489,386,9,0.558,1960,AFC East


In [85]:
nfl_data.tail(2)                                      #Don't use 'print(nfl_data.head/tail())' 

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
3,4,Miami Dolphins,800,445,351,4,0.559,1966,AFC East
4,5,New England Patriots[b],884,489,386,9,0.558,1960,AFC East


In [88]:
nfl_data.iloc[2]                                  #'.iloc' or '.loc' is now used instead of '.ix'

Rank                            3
Team                Chicago Bears
GP                          1,370
Won                           749
Lost                          579
Tied                           42
Pct.                        0.562
First NFL Season             1920
Division                NFC North
Name: 2, dtype: object

In [58]:
nfl_data['Stadium'] = 'University of Phoenix Stadium'
nfl_data

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division,Stadium
0,1,Dallas Cowboys,882,502,374,6,0.573,1960,NFC East,University of Phoenix Stadium
1,2,Green Bay Packers,1336,737,562,37,0.565,1921,NFC North,University of Phoenix Stadium
2,3,Chicago Bears,1370,749,579,42,0.562,1920,NFC North,University of Phoenix Stadium
3,4,Miami Dolphins,800,445,351,4,0.559,1966,AFC East,University of Phoenix Stadium
4,5,New England Patriots[b],884,489,386,9,0.558,1960,AFC East,University of Phoenix Stadium


In [64]:
nfl_data['Stadium'] = np.arange(5)
nfl_data

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division,Stadium
0,1,Dallas Cowboys,882,502,374,6,0.573,1960,NFC East,0
1,2,Green Bay Packers,1336,737,562,37,0.565,1921,NFC North,1
2,3,Chicago Bears,1370,749,579,42,0.562,1920,NFC North,2
3,4,Miami Dolphins,800,445,351,4,0.559,1966,AFC East,3
4,5,New England Patriots[b],884,489,386,9,0.558,1960,AFC East,4


In [73]:
example = Series(['Bank of America Stadium', 'University of Phoenix Stadium', 'Bears Field'], index = [1, 2, 3])
print(example)

1          Bank of America Stadium
2    University of Phoenix Stadium
3                      Bears Field
dtype: object


In [74]:
nfl_data['Stadium'] = example
nfl_data

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division,Stadium
0,1,Dallas Cowboys,882,502,374,6,0.573,1960,NFC East,
1,2,Green Bay Packers,1336,737,562,37,0.565,1921,NFC North,Bank of America Stadium
2,3,Chicago Bears,1370,749,579,42,0.562,1920,NFC North,University of Phoenix Stadium
3,4,Miami Dolphins,800,445,351,4,0.559,1966,AFC East,Bears Field
4,5,New England Patriots[b],884,489,386,9,0.558,1960,AFC East,


In [75]:
del nfl_data['Stadium']                     #Deleting a created object
nfl_data

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,882,502,374,6,0.573,1960,NFC East
1,2,Green Bay Packers,1336,737,562,37,0.565,1921,NFC North
2,3,Chicago Bears,1370,749,579,42,0.562,1920,NFC North
3,4,Miami Dolphins,800,445,351,4,0.559,1966,AFC East
4,5,New England Patriots[b],884,489,386,9,0.558,1960,AFC East


In [94]:
data_set = {'City': ['DUB', 'LA', 'PHX'], 'Population': ['5', '10', '20']}    #DataFrames & dictionaries
city_data = DataFrame(data_set)
city_data                                                                     #WITH 'DataFrame' format

Unnamed: 0,City,Population
0,DUB,5
1,LA,10
2,PHX,20


In [95]:
data_set = {'City': ['DUB', 'LA', 'PHX'], 'Population': ['5', '10', '20']}    
city_data = data_set
city_data                                                                    #WITHOUT 'DataFrame' format (Ordinary dictionary)

{'City': ['DUB', 'LA', 'PHX'], 'Population': ['5', '10', '20']}