# Andrew Podhorecki
## 11/12/2020 WK12
### Python Pandas Series

In [1]:
import numpy as np

from pandas import Series,DataFrame
import pandas as pd

In [2]:
# creating a new series
# series are only 1 collumn
# series are newer and can do more than dictionarys

S = Series([2,5,7,-9,8,4])

In [3]:
S

0    2
1    5
2    7
3   -9
4    8
5    4
dtype: int64

In [4]:
# to get only values
S.values

array([ 2,  5,  7, -9,  8,  4], dtype=int64)

In [5]:
# shows labels and index
S.index

RangeIndex(start=0, stop=6, step=1)

In [6]:
# values and index

summer2018_olympic = Series([2520,1122,937,847,713],index=['USA','Russia','Germany','UK','France'])

summer2018_olympic

USA        2520
Russia     1122
Germany     937
UK          847
France      713
dtype: int64

In [7]:
summer2018_olympic['USA']

2520

In [8]:
summer2018_olympic[summer2018_olympic>800]

USA        2520
Russia     1122
Germany     937
UK          847
dtype: int64

In [9]:
'USA' in summer2018_olympic

True

In [10]:
'Argentina' in summer2018_olympic

False

In [11]:
# convert series to dictionary

summer2018_olympic_dict = summer2018_olympic.to_dict()

summer2018_olympic_dict

{'USA': 2520, 'Russia': 1122, 'Germany': 937, 'UK': 847, 'France': 713}

In [12]:
# convert dictionary to series

summer2018_olympic_ser = Series(summer2018_olympic_dict)

summer2018_olympic_ser

USA        2520
Russia     1122
Germany     937
UK          847
France      713
dtype: int64

In [13]:
# reordered and added new
# pandas will still math index to key

countries = ['Germany','Russia','France','Spain','USA','UK']

In [14]:
S2 = Series(summer2018_olympic_dict, index = countries)

In [15]:
S2

Germany     937.0
Russia     1122.0
France      713.0
Spain         NaN
USA        2520.0
UK          847.0
dtype: float64

In [16]:
# check for null data

pd.isnull(S2)

Germany    False
Russia     False
France     False
Spain       True
USA        False
UK         False
dtype: bool

In [17]:
pd.notnull(S2)

Germany     True
Russia      True
France      True
Spain      False
USA         True
UK          True
dtype: bool

In [18]:
summer2018_olympic

USA        2520
Russia     1122
Germany     937
UK          847
France      713
dtype: int64

In [19]:
S2

Germany     937.0
Russia     1122.0
France      713.0
Spain         NaN
USA        2520.0
UK          847.0
dtype: float64

In [20]:
summer2018_olympic + S2

France     1426.0
Germany    1874.0
Russia     2244.0
Spain         NaN
UK         1694.0
USA        5040.0
dtype: float64

In [21]:
# gives the object a name

S2.name = 'Summer Olympic'

S2

Germany     937.0
Russia     1122.0
France      713.0
Spain         NaN
USA        2520.0
UK          847.0
Name: Summer Olympic, dtype: float64

In [22]:
# can name indexes

S2.index.name = 'Countries'

S2

Countries
Germany     937.0
Russia     1122.0
France      713.0
Spain         NaN
USA        2520.0
UK          847.0
Name: Summer Olympic, dtype: float64

In [23]:
# DataFrame (can have multiple columns)

data_dict = {'States': ['Ohio','Ohio','Ohio','Nevada','Nevada','Nevada'],
              'Years': [2000,2001,2002,2001,2002,2003],
         'Population': [1.5,1.7,3.6,2.4,2.9,3.2]}

df = pd.DataFrame(data_dict)

df

Unnamed: 0,States,Years,Population
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [24]:
# gives top 5 rows

df.head()

Unnamed: 0,States,Years,Population
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9


In [25]:
# bottom 5

df.tail()

Unnamed: 0,States,Years,Population
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [26]:
# reorder columns

pd.DataFrame(data_dict, columns = ['Years','States','Population'])

Unnamed: 0,Years,States,Population
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9
5,2003,Nevada,3.2


In [27]:
# if pass a column that does not exist, it adds it. Can specify index

df2 = pd.DataFrame(data_dict, columns = ['Years','States','Population','Debt'])

df2

Unnamed: 0,Years,States,Population,Debt
0,2000,Ohio,1.5,
1,2001,Ohio,1.7,
2,2002,Ohio,3.6,
3,2001,Nevada,2.4,
4,2002,Nevada,2.9,
5,2003,Nevada,3.2,


In [28]:
df2 = pd.DataFrame(data_dict, columns = ['Years','States','Population','Debt'],
            index=['one','two','three','four','five','six'])

df2

Unnamed: 0,Years,States,Population,Debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,
six,2003,Nevada,3.2,


In [29]:
# see how many collumns

df2.columns

Index(['Years', 'States', 'Population', 'Debt'], dtype='object')

In [30]:
# display for specific

df2['States']

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: States, dtype: object

In [31]:
df2.States

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: States, dtype: object

In [32]:
df2.Years

one      2000
two      2001
three    2002
four     2001
five     2002
six      2003
Name: Years, dtype: int64

In [33]:
# shows data at specific row

df2.loc['three']

Years         2002
States        Ohio
Population     3.6
Debt           NaN
Name: three, dtype: object

In [34]:
# add values

df2['debt'] = 10.5

df2

Unnamed: 0,Years,States,Population,Debt,debt
one,2000,Ohio,1.5,,10.5
two,2001,Ohio,1.7,,10.5
three,2002,Ohio,3.6,,10.5
four,2001,Nevada,2.4,,10.5
five,2002,Nevada,2.9,,10.5
six,2003,Nevada,3.2,,10.5


In [35]:
df2['Debt'] = 13.8

df2

Unnamed: 0,Years,States,Population,Debt,debt
one,2000,Ohio,1.5,13.8,10.5
two,2001,Ohio,1.7,13.8,10.5
three,2002,Ohio,3.6,13.8,10.5
four,2001,Nevada,2.4,13.8,10.5
five,2002,Nevada,2.9,13.8,10.5
six,2003,Nevada,3.2,13.8,10.5


In [36]:
df2['debt'] = np.arange(6.)

df2

Unnamed: 0,Years,States,Population,Debt,debt
one,2000,Ohio,1.5,13.8,0.0
two,2001,Ohio,1.7,13.8,1.0
three,2002,Ohio,3.6,13.8,2.0
four,2001,Nevada,2.4,13.8,3.0
five,2002,Nevada,2.9,13.8,4.0
six,2003,Nevada,3.2,13.8,5.0


In [37]:
S3 = pd.Series([-1.2,-1.5,-1.7], index = ['two','four','five'])

S3

two    -1.2
four   -1.5
five   -1.7
dtype: float64

In [38]:
# combining with missing values

df2['debt'] = S3

df2

Unnamed: 0,Years,States,Population,Debt,debt
one,2000,Ohio,1.5,13.8,
two,2001,Ohio,1.7,13.8,-1.2
three,2002,Ohio,3.6,13.8,
four,2001,Nevada,2.4,13.8,-1.5
five,2002,Nevada,2.9,13.8,-1.7
six,2003,Nevada,3.2,13.8,


In [42]:
# nested dictionary

# data_dict = {'States': ['Ohio','Ohio','Ohio','Nevada','Nevada','Nevada'],
#               'Years': [2000,2001,2002,2001,2002,2003],
#          'Population': [1.5,1.7,3.6,2.4,2.9,3.2]}

pop_dict = {'Nevada': [2001: 2.4, 2002: 2.9],
              'Ohio': [2000: 1.5, 2001: 1.7, 2002: 3.6]}

pop_dict

SyntaxError: invalid syntax (<ipython-input-42-a6b0ad49b0c3>, line 7)

In [44]:
DataFrame(df2,index = [2001,2002,2003])

Unnamed: 0,Years,States,Population,Debt,debt
2001,,,,,
2002,,,,,
2003,,,,,


In [47]:
df2['Ohio'][:-1]

KeyError: 'Ohio'