In [3]:
import numpy as np
import pandas as pd

In [4]:
x = pd.Series([10, 20, 30, 40, 50])

In [5]:
x

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [6]:
x.index

RangeIndex(start=0, stop=5, step=1)

In [7]:
x.values

array([10, 20, 30, 40, 50], dtype=int64)

In [8]:
x.dtype

dtype('int64')

In [9]:
data = [450, 650, 870]
sales = pd.Series(data, index=['Bob', 'Sally', 'Don'])
sales

Bob      450
Sally    650
Don      870
dtype: int64

In [10]:
sales['Bob']

450

In [11]:
sales[0]

450

In [13]:
sales[sales > 500]

Sally    650
Don      870
dtype: int64

In [14]:
'Don' in sales

True

In [15]:
650 in sales.values

True

In [16]:
sales_dict = sales.to_dict()
sales_dict

{'Bob': 450, 'Sally': 650, 'Don': 870}

In [17]:
sales_ser = pd.Series(sales_dict)
sales_ser

Bob      450
Sally    650
Don      870
dtype: int64

In [18]:
new_sales = pd.Series(sales, index=['Don', 'Sally', 'Lucy', 'Mike', 'Bob'])
new_sales

Don      870.0
Sally    650.0
Lucy       NaN
Mike       NaN
Bob      450.0
dtype: float64

In [20]:
np.isnan(new_sales)

Don      False
Sally    False
Lucy      True
Mike      True
Bob      False
dtype: bool

In [21]:
pd.isnull(new_sales)

Don      False
Sally    False
Lucy      True
Mike      True
Bob      False
dtype: bool

In [23]:
new_sales.index.name = 'Sales Person'
new_sales

Sales Person
Don      870.0
Sally    650.0
Lucy       NaN
Mike       NaN
Bob      450.0
dtype: float64

In [24]:
new_sales.name = 'Total Sales'
new_sales

Sales Person
Don      870.0
Sally    650.0
Lucy       NaN
Mike       NaN
Bob      450.0
Name: Total Sales, dtype: float64

In [25]:
data = [['Don', 870], ['Sally', 678], ['Bob', 475]]
df = pd.DataFrame(data, columns=['Name', 'Sales'])
df

Unnamed: 0,Name,Sales
0,Don,870
1,Sally,678
2,Bob,475


In [26]:
df_dict = pd.DataFrame(sales_dict, index=['1'])
df_dict

Unnamed: 0,Bob,Sally,Don
1,450,650,870


In [27]:
dict_list = [{'Name':'Tom','Sales':250},{'Name':'Jane','Sales':300},{'Name':'Steve','Sales':350}
            ,{'Name':'Lucy','Sales':400}]

df_dict_list = pd.DataFrame(dict_list)
df_dict_list

Unnamed: 0,Name,Sales
0,Tom,250
1,Jane,300
2,Steve,350
3,Lucy,400


In [28]:
east = pd.Series([1000,1200,3400],index=['Q1','Q2','Q3'])
west = pd.Series([1100,1300,2400,3500],index=['Q1','Q2','Q3','Q4'])

In [29]:
df_region = pd.DataFrame({'East':east, 'West':west})
df_region

Unnamed: 0,East,West
Q1,1000.0,1100
Q2,1200.0,1300
Q3,3400.0,2400
Q4,,3500


In [30]:
years = ['2015', '2016', '2017', '2018']
df_region['years'] = years
df_region

Unnamed: 0,East,West,years
Q1,1000.0,1100,2015
Q2,1200.0,1300,2016
Q3,3400.0,2400,2017
Q4,,3500,2018


In [31]:
df_region = df_region.set_index('years')

In [32]:
df_region

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2015,1000.0,1100
2016,1200.0,1300
2017,3400.0,2400
2018,,3500


In [33]:
new_df = df_region.reindex(['2014', '2015', '2016', '2017', '2018', '2019'])
new_df

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,,
2015,1000.0,1100.0
2016,1200.0,1300.0
2017,3400.0,2400.0
2018,,3500.0
2019,,


In [34]:
new_df = new_df.reindex(columns=['East', 'South', 'West', 'North'])
new_df

Unnamed: 0_level_0,East,South,West,North
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014,,,,
2015,1000.0,,1100.0,
2016,1200.0,,1300.0,
2017,3400.0,,2400.0,
2018,,,3500.0,
2019,,,,


In [35]:
new_df.fillna(0)

Unnamed: 0_level_0,East,South,West,North
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014,0.0,0.0,0.0,0.0
2015,1000.0,0.0,1100.0,0.0
2016,1200.0,0.0,1300.0,0.0
2017,3400.0,0.0,2400.0,0.0
2018,0.0,0.0,3500.0,0.0
2019,0.0,0.0,0.0,0.0


In [36]:
new_df.fillna(method='ffill')

Unnamed: 0_level_0,East,South,West,North
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014,,,,
2015,1000.0,,1100.0,
2016,1200.0,,1300.0,
2017,3400.0,,2400.0,
2018,3400.0,,3500.0,
2019,3400.0,,3500.0,


In [37]:
new_df.interpolate()

Unnamed: 0_level_0,East,South,West,North
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014,,,,
2015,1000.0,,1100.0,
2016,1200.0,,1300.0,
2017,3400.0,,2400.0,
2018,3400.0,,3500.0,
2019,3400.0,,3500.0,


In [38]:
new_df.dropna(axis=1, how='all')

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,,
2015,1000.0,1100.0
2016,1200.0,1300.0
2017,3400.0,2400.0
2018,,3500.0
2019,,


In [39]:
new_df.dropna(thresh=1)

Unnamed: 0_level_0,East,South,West,North
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015,1000.0,,1100.0,
2016,1200.0,,1300.0,
2017,3400.0,,2400.0,
2018,,,3500.0,


In [40]:
new_df['East']

years
2014       NaN
2015    1000.0
2016    1200.0
2017    3400.0
2018       NaN
2019       NaN
Name: East, dtype: float64

In [41]:
new_df.iloc[2]

East     1200.0
South       NaN
West     1300.0
North       NaN
Name: 2016, dtype: float64

In [42]:
new_df.loc['2016']

East     1200.0
South       NaN
West     1300.0
North       NaN
Name: 2016, dtype: float64

In [43]:
new_df

Unnamed: 0_level_0,East,South,West,North
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014,,,,
2015,1000.0,,1100.0,
2016,1200.0,,1300.0,
2017,3400.0,,2400.0,
2018,,,3500.0,
2019,,,,


In [46]:
new_df.sort_index(ascending=False)

Unnamed: 0_level_0,East,South,West,North
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019,,,,
2018,,,3500.0,
2017,3400.0,,2400.0,
2016,1200.0,,1300.0,
2015,1000.0,,1100.0,
2014,,,,


In [49]:
new_df.sort_values(by=['East'])

Unnamed: 0_level_0,East,South,West,North
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015,1000.0,,1100.0,
2016,1200.0,,1300.0,
2017,3400.0,,2400.0,
2014,,,,
2018,,,3500.0,
2019,,,,


In [57]:
data = pd.read_csv(r'C:\Users\eddie\Documents\iris.csv')
df = pd.DataFrame(data)
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [65]:
df_species = df.groupby(['species']).mean()
df_species

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,5.006,3.418,1.464,0.244
versicolor,5.936,2.77,4.26,1.326
virginica,6.588,2.974,5.552,2.026


In [70]:
data_top = data.head()
data_top

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [71]:
data_describe = data.describe()
data_describe

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [76]:
df['rank'] = df['petal_length'].rank()
df.sort_values('rank')

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,rank
22,4.6,3.6,1.0,0.2,setosa,1.0
13,4.3,3.0,1.1,0.1,setosa,2.0
14,5.8,4.0,1.2,0.2,setosa,3.5
35,5.0,3.2,1.2,0.2,setosa,3.5
36,5.5,3.5,1.3,0.2,setosa,8.0
...,...,...,...,...,...,...
131,7.9,3.8,6.4,2.0,virginica,146.0
105,7.6,3.0,6.6,2.1,virginica,147.0
117,7.7,3.8,6.7,2.2,virginica,148.5
122,7.7,2.8,6.7,2.0,virginica,148.5


In [77]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,rank
0,5.1,3.5,1.4,0.2,setosa,17.5
1,4.9,3.0,1.4,0.2,setosa,17.5
2,4.7,3.2,1.3,0.2,setosa,8.0
3,4.6,3.1,1.5,0.2,setosa,30.5
4,5.0,3.6,1.4,0.2,setosa,17.5
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,117.5
146,6.3,2.5,5.0,1.9,virginica,106.5
147,6.5,3.0,5.2,2.0,virginica,117.5
148,6.2,3.4,5.4,2.3,virginica,121.5


In [116]:
setosa_df = df[df['species'] == 'setosa'].reset_index()
setosa_df = setosa_df.drop(columns=['index'])
setosa_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,rank
0,5.1,3.5,1.4,0.2,setosa,17.5
1,4.9,3.0,1.4,0.2,setosa,17.5
2,4.7,3.2,1.3,0.2,setosa,8.0
3,4.6,3.1,1.5,0.2,setosa,30.5
4,5.0,3.6,1.4,0.2,setosa,17.5
5,5.4,3.9,1.7,0.4,setosa,46.5
6,4.6,3.4,1.4,0.3,setosa,17.5
7,5.0,3.4,1.5,0.2,setosa,30.5
8,4.4,2.9,1.4,0.2,setosa,17.5
9,4.9,3.1,1.5,0.1,setosa,30.5


In [107]:
versicolor_df = df[df['species'] == 'versicolor'].reset_index()
versicolor_df = versicolor_df.drop(columns=['index'])
versicolor_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,7.0,3.2,4.7,1.4
1,6.4,3.2,4.5,1.5
2,6.9,3.1,4.9,1.5
3,5.5,2.3,4.0,1.3
4,6.5,2.8,4.6,1.5
5,5.7,2.8,4.5,1.3
6,6.3,3.3,4.7,1.6
7,4.9,2.4,3.3,1.0
8,6.6,2.9,4.6,1.3
9,5.2,2.7,3.9,1.4


In [110]:
virginica_df = df[df['species'] == 'virginica'].reset_index()
virginica_df = virginica_df.drop(columns=['index'])
virginica_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,rank
0,6.3,3.3,6.0,2.5,virginica,140.5
1,5.8,2.7,5.1,1.9,virginica,112.5
2,7.1,3.0,5.9,2.1,virginica,138.5
3,6.3,2.9,5.6,1.8,virginica,128.5
4,6.5,3.0,5.8,2.2,virginica,136.0
5,7.6,3.0,6.6,2.1,virginica,147.0
6,4.9,2.5,4.5,1.7,virginica,83.5
7,7.3,2.9,6.3,1.8,virginica,145.0
8,6.7,2.5,5.8,1.8,virginica,136.0
9,7.2,3.6,6.1,2.5,virginica,143.0


In [112]:
sepal_df = df.drop(columns=['petal_length', 'petal_width'])
petal_df = df.drop(columns=['sepal_length', 'sepal_width'])

In [118]:
concat_df = [setosa_df, versicolor_df, virginica_df]
whole_df = pd.concat(concat_df, axis=0, ignore_index=True, sort=False)
whole_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,rank
0,5.1,3.5,1.4,0.2,setosa,17.5
1,4.9,3.0,1.4,0.2,setosa,17.5
2,4.7,3.2,1.3,0.2,setosa,8.0
3,4.6,3.1,1.5,0.2,setosa,30.5
4,5.0,3.6,1.4,0.2,setosa,17.5
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,117.5
146,6.3,2.5,5.0,1.9,virginica,106.5
147,6.5,3.0,5.2,2.0,virginica,117.5
148,6.2,3.4,5.4,2.3,virginica,121.5


In [119]:
sepal_df

Unnamed: 0,sepal_length,sepal_width,species,rank
0,5.1,3.5,setosa,17.5
1,4.9,3.0,setosa,17.5
2,4.7,3.2,setosa,8.0
3,4.6,3.1,setosa,30.5
4,5.0,3.6,setosa,17.5
...,...,...,...,...
145,6.7,3.0,virginica,117.5
146,6.3,2.5,virginica,106.5
147,6.5,3.0,virginica,117.5
148,6.2,3.4,virginica,121.5


In [123]:
merged_df = pd.merge(sepal_df, petal_df, left_index=True, right_index=True)
merged_df = merged_df.drop(columns=['species_x', 'rank_x', 'species_y'])
merged_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,rank_y
0,5.1,3.5,1.4,0.2,17.5
1,4.9,3.0,1.4,0.2,17.5
2,4.7,3.2,1.3,0.2,8.0
3,4.6,3.1,1.5,0.2,30.5
4,5.0,3.6,1.4,0.2,17.5
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,117.5
146,6.3,2.5,5.0,1.9,106.5
147,6.5,3.0,5.2,2.0,117.5
148,6.2,3.4,5.4,2.3,121.5
