In [5]:
import numpy as np 
import pandas as pd

In [6]:
x = pd.Series([10, 20, 30, 40, 50])
x

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [7]:
x.index

RangeIndex(start=0, stop=5, step=1)

In [8]:
x.values

array([10, 20, 30, 40, 50], dtype=int64)

In [9]:
x.dtype

dtype('int64')

In [10]:
data = [450, 650, 870]
Sales = pd.Series(data, index=['Bob', 'Sally', 'Don'])
Sales

Bob      450
Sally    650
Don      870
dtype: int64

In [11]:
Sales.index

Index(['Bob', 'Sally', 'Don'], dtype='object')

In [12]:
Sales['Bob']

450

In [13]:
Sales[0]

450

In [15]:
x[4]

50

In [17]:
Sales[Sales > 500]

Sally    650
Don      870
dtype: int64

In [18]:
'Don' in Sales

True

In [19]:
650 in Sales

False

In [20]:
'650' in Sales

False

In [21]:
sales_dict = Sales.to_dict()
sales_dict

{'Bob': 450, 'Sally': 650, 'Don': 870}

In [23]:
sales_ser = pd.Series(sales_dict)
sales_ser

Bob      450
Sally    650
Don      870
dtype: int64

In [24]:
new_sales = pd.Series(Sales, index=['Don', 'Sally', 'Lucy', 'Mike', 'Bob'])
new_sales

Don      870.0
Sally    650.0
Lucy       NaN
Mike       NaN
Bob      450.0
dtype: float64

In [25]:
np.isnan(new_sales)

Don      False
Sally    False
Lucy      True
Mike      True
Bob      False
dtype: bool

In [26]:
pd.isnull(new_sales)

Don      False
Sally    False
Lucy      True
Mike      True
Bob      False
dtype: bool

In [28]:
new_sales.index.name = "Sales Person"
new_sales

Sales Person
Don      870.0
Sally    650.0
Lucy       NaN
Mike       NaN
Bob      450.0
dtype: float64

In [29]:
new_sales.name = "Total Sales"

In [30]:
new_sales

Sales Person
Don      870.0
Sally    650.0
Lucy       NaN
Mike       NaN
Bob      450.0
Name: Total Sales, dtype: float64

In [31]:
data = [['Don', 870], ['Sally', 678], ['Bob', 4756]]
df = pd.DataFrame(data, columns=['Name', 'Sales'])
df

Unnamed: 0,Name,Sales
0,Don,870
1,Sally,678
2,Bob,4756


In [32]:
sales_dict

{'Bob': 450, 'Sally': 650, 'Don': 870}

In [34]:
df_dict = pd.DataFrame(sales_dict,index=[1])
df_dict

Unnamed: 0,Bob,Sally,Don
1,450,650,870


In [37]:
dict_list = [{'Name':'Tom','Sales':250},{'Name':'Jane','Sales':300},{'Name':'Steve','Sales':350}
            ,{'Name':'Tom','Sales':400}]

df_dict_list = pd.DataFrame(dict_list)
df_dict_list

Unnamed: 0,Name,Sales
0,Tom,250
1,Jane,300
2,Steve,350
3,Tom,400


In [38]:
east = pd.Series([1000,1200,3400],index=['Q1','Q2','Q3'])
west = pd.Series([1100,1300,2400,3500],index=['Q1','Q2','Q3','Q4'])

In [39]:
df_region = pd.DataFrame({'East':east,'West':west})
df_region

Unnamed: 0,East,West
Q1,1000.0,1100
Q2,1200.0,1300
Q3,3400.0,2400
Q4,,3500


In [40]:
years = ['2015','2016','2017','2018']
df_region['years'] = years
df_region

Unnamed: 0,East,West,years
Q1,1000.0,1100,2015
Q2,1200.0,1300,2016
Q3,3400.0,2400,2017
Q4,,3500,2018


In [41]:
df_region = df_region.set_index('years')
df_region

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2015,1000.0,1100
2016,1200.0,1300
2017,3400.0,2400
2018,,3500


In [53]:
new_df = df_region.reindex(['2014','2015','2016','2017','2018','2019','2020'])
new_df

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,,
2015,1000.0,1100.0
2016,1200.0,1300.0
2017,3400.0,2400.0
2018,,3500.0
2019,,
2020,,


In [56]:
new_df = new_df.reindex(columns=['East','South','West'])
new_df

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2019,,,
2020,,,


In [50]:
new_df = new_df.dropna(how='all')


In [51]:
new_df

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0


In [52]:
new_df = new_df.dropna(how='all', axis=1)
new_df

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2015,1000.0,1100.0
2016,1200.0,1300.0
2017,3400.0,2400.0
2018,,3500.0


In [54]:
new_df

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,,
2015,1000.0,1100.0
2016,1200.0,1300.0
2017,3400.0,2400.0
2018,,3500.0
2019,,
2020,,


In [55]:
new_df.dropna()

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2015,1000.0,1100.0
2016,1200.0,1300.0
2017,3400.0,2400.0


In [57]:
new_df.dropna()

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [58]:
new_df.dropna(thresh=3)

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [59]:
new_df

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2019,,,
2020,,,


In [60]:
new_df.dropna(subset=['East','West'])

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0


In [65]:
new_df.interpolate()

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,3400.0,,3500.0
2019,3400.0,,3500.0
2020,3400.0,,3500.0


In [70]:
new_df.drop('2019')

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2020,,,


In [71]:
new_df['East']

years
2014       NaN
2015    1000.0
2016    1200.0
2017    3400.0
2018       NaN
2019       NaN
2020       NaN
Name: East, dtype: float64

In [75]:
new_df.iloc[2]

East     1200.0
South       NaN
West     1300.0
Name: 2016, dtype: float64

In [76]:
new_df.loc[['2016', '2019']]

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016,1200.0,,1300.0
2019,,,


In [77]:
new_df

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2019,,,
2020,,,


In [79]:
new_df.sort_index(ascending=False)

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020,,,
2019,,,
2018,,,3500.0
2017,3400.0,,2400.0
2016,1200.0,,1300.0
2015,1000.0,,1100.0
2014,,,


In [84]:
new_df.sort_values(by=['East','West'])

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2014,,,
2019,,,
2020,,,


In [85]:
'groups of 3-4'
'iris.csv'
'pandas readcsv method in order to read in iris csv'
'put iris.csv into a DataFrame'
'look at pandas group by method'
'have a look at .describe'
'what that looks like on normal iris and then the grouped by iris'

'what that looks like on normal iris and then the grouped by iris'

In [99]:
iris = pd.read_csv('Iris.csv')
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,5.1,3.5,1.4,0.2,setosa,,,,,,
1,4.9,3.0,1.4,0.2,setosa,,,,,,
2,4.7,3.2,1.3,0.2,setosa,,,,,,
3,4.6,3.1,1.5,0.2,setosa,,,,,,
4,5.0,3.6,1.4,0.2,setosa,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,,,,,,
146,6.3,2.5,5.0,1.9,virginica,,,,,,
147,6.5,3.0,5.2,2.0,virginica,,,,,,
148,6.2,3.4,5.4,2.3,virginica,,,,,,


In [102]:
iris = iris.dropna(how='all', axis=1)
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [103]:
iris_df = pd.DataFrame(iris)
iris_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [107]:
iris_df_groupbymean = iris_df.groupby(['species']).mean()
iris_df_groupbymean

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,5.006,3.418,1.464,0.244
versicolor,5.936,2.77,4.26,1.326
virginica,6.588,2.974,5.552,2.026


In [111]:
iris_df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [113]:
iris_df_groupbymean.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,3.0,3.0,3.0,3.0
mean,5.843333,3.054,3.758667,1.198667
std,0.795061,0.331325,2.089602,0.897798
min,5.006,2.77,1.464,0.244
25%,5.471,2.872,2.862,0.785
50%,5.936,2.974,4.26,1.326
75%,6.262,3.196,4.906,1.676
max,6.588,3.418,5.552,2.026


In [114]:
iris_df_groupbymean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, setosa to virginica
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  3 non-null      float64
 1   sepal_width   3 non-null      float64
 2   petal_length  3 non-null      float64
 3   petal_width   3 non-null      float64
dtypes: float64(4)
memory usage: 120.0+ bytes


In [115]:
iris_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [121]:
iris_df['petal_length_ranked'] = iris_df['petal_length'].rank()
iris_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,petal_length_ranked
0,5.1,3.5,1.4,0.2,setosa,17.5
1,4.9,3.0,1.4,0.2,setosa,17.5
2,4.7,3.2,1.3,0.2,setosa,8.0
3,4.6,3.1,1.5,0.2,setosa,30.5
4,5.0,3.6,1.4,0.2,setosa,17.5
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,117.5
146,6.3,2.5,5.0,1.9,virginica,106.5
147,6.5,3.0,5.2,2.0,virginica,117.5
148,6.2,3.4,5.4,2.3,virginica,121.5


In [126]:
iris_df_sorted = iris_df.sort_values(by=['petal_length_ranked'])
iris_df_sorted

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,petal_length_ranked
22,4.6,3.6,1.0,0.2,setosa,1.0
13,4.3,3.0,1.1,0.1,setosa,2.0
14,5.8,4.0,1.2,0.2,setosa,3.5
35,5.0,3.2,1.2,0.2,setosa,3.5
36,5.5,3.5,1.3,0.2,setosa,8.0
...,...,...,...,...,...,...
131,7.9,3.8,6.4,2.0,virginica,146.0
105,7.6,3.0,6.6,2.1,virginica,147.0
117,7.7,3.8,6.7,2.2,virginica,148.5
122,7.7,2.8,6.7,2.0,virginica,148.5


In [145]:
setosa_df = iris_df[iris_df['species'] == 'setosa'].reset_index()
setosa_df = setosa_df.drop(columns=['index','petal_length_ranked'])
setosa_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


In [146]:
versicolor_df = iris_df[iris_df['species'] == 'versicolor'].reset_index()
versicolor_df = versicolor_df.drop(columns=['index','petal_length_ranked'])
versicolor_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,7.0,3.2,4.7,1.4,versicolor
1,6.4,3.2,4.5,1.5,versicolor
2,6.9,3.1,4.9,1.5,versicolor
3,5.5,2.3,4.0,1.3,versicolor
4,6.5,2.8,4.6,1.5,versicolor
5,5.7,2.8,4.5,1.3,versicolor
6,6.3,3.3,4.7,1.6,versicolor
7,4.9,2.4,3.3,1.0,versicolor
8,6.6,2.9,4.6,1.3,versicolor
9,5.2,2.7,3.9,1.4,versicolor


In [147]:
virginica_df = iris_df[iris_df['species'] == 'virginica'].reset_index()
virginica_df = virginica_df.drop(columns=['index','petal_length_ranked'])
virginica_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,6.3,3.3,6.0,2.5,virginica
1,5.8,2.7,5.1,1.9,virginica
2,7.1,3.0,5.9,2.1,virginica
3,6.3,2.9,5.6,1.8,virginica
4,6.5,3.0,5.8,2.2,virginica
5,7.6,3.0,6.6,2.1,virginica
6,4.9,2.5,4.5,1.7,virginica
7,7.3,2.9,6.3,1.8,virginica
8,6.7,2.5,5.8,1.8,virginica
9,7.2,3.6,6.1,2.5,virginica


In [148]:
sepal_df = iris_df.drop(columns=['petal_length','petal_width'])
sepal_df

Unnamed: 0,sepal_length,sepal_width,species,petal_length_ranked
0,5.1,3.5,setosa,17.5
1,4.9,3.0,setosa,17.5
2,4.7,3.2,setosa,8.0
3,4.6,3.1,setosa,30.5
4,5.0,3.6,setosa,17.5
...,...,...,...,...
145,6.7,3.0,virginica,117.5
146,6.3,2.5,virginica,106.5
147,6.5,3.0,virginica,117.5
148,6.2,3.4,virginica,121.5


In [149]:
petal_df = iris_df.drop(columns=['sepal_length','sepal_width'])
petal_df

Unnamed: 0,petal_length,petal_width,species,petal_length_ranked
0,1.4,0.2,setosa,17.5
1,1.4,0.2,setosa,17.5
2,1.3,0.2,setosa,8.0
3,1.5,0.2,setosa,30.5
4,1.4,0.2,setosa,17.5
...,...,...,...,...
145,5.2,2.3,virginica,117.5
146,5.0,1.9,virginica,106.5
147,5.2,2.0,virginica,117.5
148,5.4,2.3,virginica,121.5


In [169]:
concat_species = pd.concat([setosa_df, versicolor_df, virginica_df], axis=0, ignore_index=True, sort=False)

concat_species

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,petal_length_ranked
0,5.1,3.5,1.4,0.2,setosa,
1,4.9,3.0,1.4,0.2,setosa,
2,4.7,3.2,1.3,0.2,setosa,
3,4.6,3.1,1.5,0.2,setosa,
4,5.0,3.6,1.4,0.2,setosa,
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,117.5
146,6.3,2.5,5.0,1.9,virginica,106.5
147,6.5,3.0,5.2,2.0,virginica,117.5
148,6.2,3.4,5.4,2.3,virginica,121.5


In [166]:
merge_species = pd.merge(sepal_df, petal_df, left_index=True, right_index=True)
merge_species = merge_species.drop(columns=['petal_length_ranked_x','petal_length_ranked_y','species_x'])
merge_species

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species_y
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica
