In [1]:
import numpy as np
import pandas as pd

In [2]:
x = pd.Series([10, 20, 30, 40, 50])
x

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [3]:
x.index

RangeIndex(start=0, stop=5, step=1)

In [4]:
x.values

array([10, 20, 30, 40, 50], dtype=int64)

In [5]:
x.dtype

dtype('int64')

In [7]:
data = [450, 650, 870]
Sales = pd.Series(data, index=['Bob', 'Sally', 'Don'])
Sales

Bob      450
Sally    650
Don      870
dtype: int64

In [8]:
Sales.index

Index(['Bob', 'Sally', 'Don'], dtype='object')

In [9]:
Sales['Bob']

450

In [10]:
Sales[0]

450

In [11]:
Sales > 500

Bob      False
Sally     True
Don       True
dtype: bool

In [12]:
Sales[Sales>500]

Sally    650
Don      870
dtype: int64

In [13]:
'Don' in Sales

True

In [14]:
650 in Sales

False

In [15]:
sales_dict = Sales.to_dict()
sales_dict

{'Bob': 450, 'Sally': 650, 'Don': 870}

In [16]:
sales_ser = pd.Series(sales_dict)
sales_ser

Bob      450
Sally    650
Don      870
dtype: int64

In [18]:
new_sales = pd.Series(Sales, index=['Don', 'Sally', 'Lucy', 'Mike', 'Bob'])
new_sales

Don      870.0
Sally    650.0
Lucy       NaN
Mike       NaN
Bob      450.0
dtype: float64

In [19]:
np.isnan(new_sales['Sally'])

False

In [20]:
pd.isnull(new_sales)

Don      False
Sally    False
Lucy      True
Mike      True
Bob      False
dtype: bool

In [21]:
new_sales.index.name = 'Sales Person'
new_sales

Sales Person
Don      870.0
Sally    650.0
Lucy       NaN
Mike       NaN
Bob      450.0
dtype: float64

In [22]:
new_sales.name = 'Total Sales'
new_sales

Sales Person
Don      870.0
Sally    650.0
Lucy       NaN
Mike       NaN
Bob      450.0
Name: Total Sales, dtype: float64

In [23]:
data = [['Don', 870], ['Sally', 678], ['Bob', 4756]]
df = pd.DataFrame(data, columns=['Name', 'Sales'])
df

Unnamed: 0,Name,Sales
0,Don,870
1,Sally,678
2,Bob,4756


In [25]:
df_dict = pd.DataFrame(sales_dict, index = [1])
df_dict

Unnamed: 0,Bob,Sally,Don
1,450,650,870


In [26]:
dict_list = [{'Name': 'Tom', 'Sales' : 250}, {'Name': 'Jane', 'Sales' :300},
             {'Name': 'Steve', 'Sales' : 350}, {'Name': 'Lucy', 'Sales' : 400}]

df_dict_list = pd.DataFrame(dict_list)
df_dict_list

Unnamed: 0,Name,Sales
0,Tom,250
1,Jane,300
2,Steve,350
3,Lucy,400


In [27]:
east = pd.Series([1000,1200,3400],index=['Q1','Q2','Q3'])
west = pd.Series([1100,1300,2400,3500],index=['Q1','Q2','Q3','Q4'])
df_region = pd.DataFrame({'East':east, 'West':west})
df_region

Unnamed: 0,East,West
Q1,1000.0,1100
Q2,1200.0,1300
Q3,3400.0,2400
Q4,,3500


In [29]:
years = ['2015', '2016', '2017', '2018']
df_region['years'] = years
df_region

Unnamed: 0,East,West,years
Q1,1000.0,1100,2015
Q2,1200.0,1300,2016
Q3,3400.0,2400,2017
Q4,,3500,2018


In [30]:
df_region = df_region.set_index('years')
df_region

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2015,1000.0,1100
2016,1200.0,1300
2017,3400.0,2400
2018,,3500


In [31]:
new_df = df_region.reindex(['2014', '2015', '2016', '2017', '2018', '2019', '2020'])
new_df

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,,
2015,1000.0,1100.0
2016,1200.0,1300.0
2017,3400.0,2400.0
2018,,3500.0
2019,,
2020,,


In [33]:
new_df = new_df.reindex(columns = ['East', 'South', 'West'])
new_df

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2019,,,
2020,,,


In [37]:
new_df.fillna(method='bfill')

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,1000.0,,1100.0
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2019,,,
2020,,,


In [38]:
new_df.fillna(method='ffill')

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,3400.0,,3500.0
2019,3400.0,,3500.0
2020,3400.0,,3500.0


In [39]:
new_df.interpolate()

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,3400.0,,3500.0
2019,3400.0,,3500.0
2020,3400.0,,3500.0


In [42]:
new_df.dropna(axis = 1, how = 'all')

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,,
2015,1000.0,1100.0
2016,1200.0,1300.0
2017,3400.0,2400.0
2018,,3500.0
2019,,
2020,,


In [44]:
new_df.dropna(axis = 0, how = 'all')

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0


In [45]:
new_df.dropna(axis = 0, how = 'any')

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [50]:
new_df.dropna(axis = 1, thresh = 1)

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,,
2015,1000.0,1100.0
2016,1200.0,1300.0
2017,3400.0,2400.0
2018,,3500.0
2019,,
2020,,


In [51]:
new_df.drop('2019')

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2020,,,


In [52]:
new_df['East']

years
2014       NaN
2015    1000.0
2016    1200.0
2017    3400.0
2018       NaN
2019       NaN
2020       NaN
Name: East, dtype: float64

In [53]:
new_df.iloc[2]

East     1200.0
South       NaN
West     1300.0
Name: 2016, dtype: float64

In [54]:
new_df.iloc[2, 0]

1200.0

In [55]:
new_df.iloc[1:3]

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,1000.0,,1100.0
2016,1200.0,,1300.0


In [58]:
new_df.iloc[['2016', '2019']]

IndexError: positional indexers are out-of-bounds

In [61]:
new_df.sort_index(ascending = False)

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020,,,
2019,,,
2018,,,3500.0
2017,3400.0,,2400.0
2016,1200.0,,1300.0
2015,1000.0,,1100.0
2014,,,


In [64]:
new_df.sort_values(by = ['East', 'West'])

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2014,,,
2019,,,
2020,,,


In [67]:
var = pd.read_csv(r'C:\Users\luke1\Documents\Data21Notes\iris.csv')
var

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [70]:
var.groupby(by = 'species').mean()

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,5.006,3.418,1.464,0.244
versicolor,5.936,2.77,4.26,1.326
virginica,6.588,2.974,5.552,2.026


In [72]:
var.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [75]:
var.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [95]:
var.drop('Rank', axis = 1)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,rank
0,5.1,3.5,1.4,0.2,setosa,17.5
1,4.9,3.0,1.4,0.2,setosa,17.5
2,4.7,3.2,1.3,0.2,setosa,8.0
3,4.6,3.1,1.5,0.2,setosa,30.5
4,5.0,3.6,1.4,0.2,setosa,17.5
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,117.5
146,6.3,2.5,5.0,1.9,virginica,106.5
147,6.5,3.0,5.2,2.0,virginica,117.5
148,6.2,3.4,5.4,2.3,virginica,121.5


In [113]:
var['rank'] = var['petal_length'].rank()
var.sort_values(by = 'rank')

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,rank
22,4.6,3.6,1.0,0.2,setosa,1.0
13,4.3,3.0,1.1,0.1,setosa,2.0
14,5.8,4.0,1.2,0.2,setosa,3.5
35,5.0,3.2,1.2,0.2,setosa,3.5
36,5.5,3.5,1.3,0.2,setosa,8.0
...,...,...,...,...,...,...
131,7.9,3.8,6.4,2.0,virginica,146.0
105,7.6,3.0,6.6,2.1,virginica,147.0
117,7.7,3.8,6.7,2.2,virginica,148.5
122,7.7,2.8,6.7,2.0,virginica,148.5


In [129]:
setosa = var[var['species'] == 'setosa'].reset_index()
setosa = setosa.drop(['index', 'rank'], axis = 1)

setosa

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


In [130]:
versicolor = var[var['species'] == 'versicolor'].reset_index()
versicolor = versicolor.drop(['index', 'rank'], axis = 1)

versicolor

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,7.0,3.2,4.7,1.4,versicolor
1,6.4,3.2,4.5,1.5,versicolor
2,6.9,3.1,4.9,1.5,versicolor
3,5.5,2.3,4.0,1.3,versicolor
4,6.5,2.8,4.6,1.5,versicolor
5,5.7,2.8,4.5,1.3,versicolor
6,6.3,3.3,4.7,1.6,versicolor
7,4.9,2.4,3.3,1.0,versicolor
8,6.6,2.9,4.6,1.3,versicolor
9,5.2,2.7,3.9,1.4,versicolor


In [131]:
virginica = var[var['species'] == 'virginica'].reset_index()
virginica = virginica.drop(['index', 'rank'], axis = 1)

virginica

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,6.3,3.3,6.0,2.5,virginica
1,5.8,2.7,5.1,1.9,virginica
2,7.1,3.0,5.9,2.1,virginica
3,6.3,2.9,5.6,1.8,virginica
4,6.5,3.0,5.8,2.2,virginica
5,7.6,3.0,6.6,2.1,virginica
6,4.9,2.5,4.5,1.7,virginica
7,7.3,2.9,6.3,1.8,virginica
8,6.7,2.5,5.8,1.8,virginica
9,7.2,3.6,6.1,2.5,virginica


In [141]:
sepal = var.iloc[:, [0, 1, 4]]
sepal

Unnamed: 0,sepal_length,sepal_width,species
0,5.1,3.5,setosa
1,4.9,3.0,setosa
2,4.7,3.2,setosa
3,4.6,3.1,setosa
4,5.0,3.6,setosa
...,...,...,...
145,6.7,3.0,virginica
146,6.3,2.5,virginica
147,6.5,3.0,virginica
148,6.2,3.4,virginica


In [142]:
petal = var.iloc[:, 2:5]
petal

Unnamed: 0,petal_length,petal_width,species
0,1.4,0.2,setosa
1,1.4,0.2,setosa
2,1.3,0.2,setosa
3,1.5,0.2,setosa
4,1.4,0.2,setosa
...,...,...,...
145,5.2,2.3,virginica
146,5.0,1.9,virginica
147,5.2,2.0,virginica
148,5.4,2.3,virginica


In [144]:
concat = pd.concat([setosa, versicolor, virginica], ignore_index = True)
concat

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [151]:
merge = sepal.merge(petal, how = 'outer', left_index = True, right_index = True)
merge

Unnamed: 0,sepal_length,sepal_width,species_x,petal_length,petal_width,species_y
0,5.1,3.5,setosa,1.4,0.2,setosa
1,4.9,3.0,setosa,1.4,0.2,setosa
2,4.7,3.2,setosa,1.3,0.2,setosa
3,4.6,3.1,setosa,1.5,0.2,setosa
4,5.0,3.6,setosa,1.4,0.2,setosa
...,...,...,...,...,...,...
145,6.7,3.0,virginica,5.2,2.3,virginica
146,6.3,2.5,virginica,5.0,1.9,virginica
147,6.5,3.0,virginica,5.2,2.0,virginica
148,6.2,3.4,virginica,5.4,2.3,virginica
