In [2]:
import numpy as np
import pandas as pd

In [3]:
x = pd.Series([10,20,30,40,50])
x

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [4]:
x.index

RangeIndex(start=0, stop=5, step=1)

In [5]:
x.values

array([10, 20, 30, 40, 50], dtype=int64)

In [6]:
x.dtype

dtype('int64')

In [7]:
data = [450,650,870]
sales = pd.Series(data, index=['Bob','Sally','Don'])
sales

Bob      450
Sally    650
Don      870
dtype: int64

In [8]:
sales.index

Index(['Bob', 'Sally', 'Don'], dtype='object')

In [9]:
sales['Bob']

450

In [10]:
sales[0]

450

In [11]:
sales > 500

Bob      False
Sally     True
Don       True
dtype: bool

In [12]:
sales[sales > 500]

Sally    650
Don      870
dtype: int64

In [13]:
'Don' in sales

True

In [15]:
650 in sales

False

In [16]:
sales_dict = sales.to_dict()
sales_dict

{'Bob': 450, 'Sally': 650, 'Don': 870}

In [17]:
sales_series = pd.Series(sales_dict)
sales_series

Bob      450
Sally    650
Don      870
dtype: int64

In [18]:
new_sales = pd.Series(sales, index=['Don', 'Sally', 'Lucy', 'Mike', 'Bob'])
new_sales

Don      870.0
Sally    650.0
Lucy       NaN
Mike       NaN
Bob      450.0
dtype: float64

In [22]:
np.isnan(new_sales)

Don      False
Sally    False
Lucy      True
Mike      True
Bob      False
dtype: bool

In [21]:
pd.isnull(new_sales)

Don      False
Sally    False
Lucy      True
Mike      True
Bob      False
dtype: bool

In [24]:
new_sales.index.name = "Sales Person"
new_sales

Sales Person
Don      870.0
Sally    650.0
Lucy       NaN
Mike       NaN
Bob      450.0
dtype: float64

In [25]:
new_sales.name = 'Total Sales'
new_sales

Sales Person
Don      870.0
Sally    650.0
Lucy       NaN
Mike       NaN
Bob      450.0
Name: Total Sales, dtype: float64

In [26]:
data = [['Don',870],['Sally', 678],['Bob', 4756]]
df = pd.DataFrame(data, columns=['Name', 'Sales'])
df

Unnamed: 0,Name,Sales
0,Don,870
1,Sally,678
2,Bob,4756


In [29]:
df_dict = pd.DataFrame(sales_dict, index=[1])
df_dict

Unnamed: 0,Bob,Sally,Don
1,450,650,870


In [31]:
dict_list = [{'Name':'Tom','Sales':250},{'Name':'Jane','Sales':300},{'Name':'Steve','Sales':350}
            ,{'Name':'Lucy','Sales':400}]
df_dict_list = pd.DataFrame(dict_list)
df_dict_list

Unnamed: 0,Name,Sales
0,Tom,250
1,Jane,300
2,Steve,350
3,Lucy,400


In [33]:
east = pd.Series([1000,1200,3400],index=['Q1','Q2','Q3'])
west = pd.Series([1100,1300,2400,3500],index=['Q1','Q2','Q3','Q4'])

In [36]:
df_region = pd.DataFrame({'East':east,'West':west})
df_region

Unnamed: 0,East,West
Q1,1000.0,1100
Q2,1200.0,1300
Q3,3400.0,2400
Q4,,3500


In [37]:
years = ['2015','2016','2017','2018']
df_region['years'] = years
df_region

Unnamed: 0,East,West,years
Q1,1000.0,1100,2015
Q2,1200.0,1300,2016
Q3,3400.0,2400,2017
Q4,,3500,2018


In [39]:
df_region = df_region.set_index('years')
df_region

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2015,1000.0,1100
2016,1200.0,1300
2017,3400.0,2400
2018,,3500


In [40]:
new_df = df_region.reindex(['2014','2015','2016','2017','2018','2019','2020'])
new_df

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,,
2015,1000.0,1100.0
2016,1200.0,1300.0
2017,3400.0,2400.0
2018,,3500.0
2019,,
2020,,


In [41]:
new_df = new_df.reindex(columns=['East','South','West'])
new_df

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2019,,,
2020,,,


In [42]:
new_df.fillna(0)

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,0.0,0.0,0.0
2015,1000.0,0.0,1100.0
2016,1200.0,0.0,1300.0
2017,3400.0,0.0,2400.0
2018,0.0,0.0,3500.0
2019,0.0,0.0,0.0
2020,0.0,0.0,0.0


In [45]:
new_df.fillna(method='ffill')

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,3400.0,,3500.0
2019,3400.0,,3500.0
2020,3400.0,,3500.0


In [46]:
new_df.fillna(method='bfill')

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,1000.0,,1100.0
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2019,,,
2020,,,


In [47]:
new_df.interpolate()

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,3400.0,,3500.0
2019,3400.0,,3500.0
2020,3400.0,,3500.0


In [49]:
new_df.dropna(axis=1, how='all')

Unnamed: 0_level_0,East,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2014,,
2015,1000.0,1100.0
2016,1200.0,1300.0
2017,3400.0,2400.0
2018,,3500.0
2019,,
2020,,


In [50]:
new_df.dropna(axis=0, how='all')

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0


In [51]:
new_df.dropna(axis=0, how='any')

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [54]:
new_df.dropna(thresh=1)
# keep the row is it has at least one value.

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0


In [55]:
new_df.drop('2019')
# Deletes that index

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2020,,,


In [56]:
new_df['East']
# Returns the east column in a series format

years
2014       NaN
2015    1000.0
2016    1200.0
2017    3400.0
2018       NaN
2019       NaN
2020       NaN
Name: East, dtype: float64

In [57]:
new_df.iloc[2]
# Shows the row of index 2. In this case it is 2016

East     1200.0
South       NaN
West     1300.0
Name: 2016, dtype: float64

In [58]:
new_df.iloc[2,0] # Returns the index row 2 and index column 0

1200.0

In [60]:
new_df.loc['2016']

East     1200.0
South       NaN
West     1300.0
Name: 2016, dtype: float64

In [61]:
new_df.loc[['2016','2019']]

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016,1200.0,,1300.0
2019,,,


In [62]:
new_df

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014,,,
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2019,,,
2020,,,


In [63]:
new_df.sort_index(ascending=0)
# ascending is the default and can be set to either True or False (or 1 or 0).

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020,,,
2019,,,
2018,,,3500.0
2017,3400.0,,2400.0
2016,1200.0,,1300.0
2015,1000.0,,1100.0
2014,,,


In [69]:
new_df.sort_values(by='East')
# Sorts values by a cetain column

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2014,,,
2018,,,3500.0
2019,,,
2020,,,


In [68]:
new_df.sort_values(by=['East','West'])

Unnamed: 0_level_0,East,South,West
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,1000.0,,1100.0
2016,1200.0,,1300.0
2017,3400.0,,2400.0
2018,,,3500.0
2014,,,
2019,,,
2020,,,
