## Series

In [1]:
import pandas as pd
import numpy as np

In [2]:
x = pd.Series([10, 20, 30, 40, 50])
x

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [3]:
x.index

RangeIndex(start=0, stop=5, step=1)

In [4]:
x.dtype

dtype('int64')

In [5]:
x.values

array([10, 20, 30, 40, 50], dtype=int64)

In [6]:
data = [450, 650, 870]
#index = ['Don', 'Mike', 'Edwin']
index = [2010, 2011, 2012]
sales = pd.Series(data, index=index)
sales.index.name = "Year"
sales.name = "Total Sales per year"
sales

Year
2010    450
2011    650
2012    870
Name: Total Sales per year, dtype: int64

In [7]:
sales.index

Int64Index([2010, 2011, 2012], dtype='int64', name='Year')

In [8]:
sales[2012]
sales

Year
2010    450
2011    650
2012    870
Name: Total Sales per year, dtype: int64

In [9]:
sales.iloc[0]

450

In [10]:
sales[2010]

450

In [11]:
sales.loc[2011] # index values

650

In [12]:
sales.iloc[0] # index locations / order

450

In [13]:
sales

Year
2010    450
2011    650
2012    870
Name: Total Sales per year, dtype: int64

In [14]:
sales > 500

Year
2010    False
2011     True
2012     True
Name: Total Sales per year, dtype: bool

In [15]:
sales[[False, True, True]]

Year
2011    650
2012    870
Name: Total Sales per year, dtype: int64

In [16]:
sales[sales>500]

Year
2011    650
2012    870
Name: Total Sales per year, dtype: int64

In [17]:
list(sales [sales > 500].values)

[650, 870]

In [18]:
650 in sales

False

In [19]:
2011 in sales

True

In [20]:
650 in sales.values

True

In [21]:
sales.to_dict()

{2010: 450, 2011: 650, 2012: 870}

In [22]:
sales_dict = {
    "Don" : 534,
    "Mike" : 453,
    "Edwin" : 412
    
}
sales_ser = pd.Series(sales_dict)
sales_ser

Don      534
Mike     453
Edwin    412
dtype: int64

In [23]:
new_sales = pd.Series(sales_dict, index=["Don", "Mike", "Sally", "Edwin", "Lucy"])
new_sales

Don      534.0
Mike     453.0
Sally      NaN
Edwin    412.0
Lucy       NaN
dtype: float64

In [24]:
np.isnan(new_sales)

Don      False
Mike     False
Sally     True
Edwin    False
Lucy      True
dtype: bool

In [25]:
pd.isnull(new_sales)

Don      False
Mike     False
Sally     True
Edwin    False
Lucy      True
dtype: bool

In [26]:
new_sales.loc["Sally"] is None

False

In [27]:
n = np.nan
n

nan

In [28]:
type(n)

float

In [29]:
new_sales

Don      534.0
Mike     453.0
Sally      NaN
Edwin    412.0
Lucy       NaN
dtype: float64

In [30]:
new_sales.loc["Sally"] = 548
new_sales

Don      534.0
Mike     453.0
Sally    548.0
Edwin    412.0
Lucy       NaN
dtype: float64

In [31]:
new_sales.index.name = "Sales Person"

In [32]:
new_sales.name = "Total TV Sales"

In [33]:
new_sales

Sales Person
Don      534.0
Mike     453.0
Sally    548.0
Edwin    412.0
Lucy       NaN
Name: Total TV Sales, dtype: float64

## DataFrames
- Two-dimensional
- Size mutable
- Heterogenous
- Rows and columns(records and series)

In [34]:
sales_df = pd.DataFrame(new_sales)

In [35]:
sales_df

Unnamed: 0_level_0,Total TV Sales
Sales Person,Unnamed: 1_level_1
Don,534.0
Mike,453.0
Sally,548.0
Edwin,412.0
Lucy,


In [36]:
data = [['Adrian', 20], ['Bethany', 23], ['Chloe', 41]]
df = pd.DataFrame(data, columns=["Name", "Age"], dtype=int)
df

  df = pd.DataFrame(data, columns=["Name", "Age"], dtype=int)


Unnamed: 0,Name,Age
0,Adrian,20
1,Bethany,23
2,Chloe,41


In [37]:
new_dict = {
    "Name": ["Tom", "Jane", "Steve", "Lucy"],
    "Sales": [250, 300, 350, 420]
}

df_dict = pd.DataFrame(new_dict)
df_dict

Unnamed: 0,Name,Sales
0,Tom,250
1,Jane,300
2,Steve,350
3,Lucy,420


In [38]:
list_dicts = [
    {"Name": "Tom", "Sales": 300},
    {"Name": "Greg", "Sales": 345},
    {"Name": "Simone", "Sales": 745},
    {"Name": "Paula", "Sales": 542, "Performance": "Outstanding"},
    {"Name": "Yuri", "Sales": 453},
]

df_list_dicts = pd.DataFrame(list_dicts)
df_list_dicts

Unnamed: 0,Name,Sales,Performance
0,Tom,300,
1,Greg,345,
2,Simone,745,
3,Paula,542,Outstanding
4,Yuri,453,


In [39]:
df_list_dicts["Sales"]

0    300
1    345
2    745
3    542
4    453
Name: Sales, dtype: int64

In [40]:
df_list_dicts["Name"]

0       Tom
1      Greg
2    Simone
3     Paula
4      Yuri
Name: Name, dtype: object

In [41]:
df_list_dicts.loc[3]

Name                 Paula
Sales                  542
Performance    Outstanding
Name: 3, dtype: object

In [42]:
df_list_dicts

Unnamed: 0,Name,Sales,Performance
0,Tom,300,
1,Greg,345,
2,Simone,745,
3,Paula,542,Outstanding
4,Yuri,453,


In [43]:
df_list_dicts["Grades"] = ["A", "A", "B", "A", "C"]
df_list_dicts

Unnamed: 0,Name,Sales,Performance,Grades
0,Tom,300,,A
1,Greg,345,,A
2,Simone,745,,B
3,Paula,542,Outstanding,A
4,Yuri,453,,C


In [44]:
df_list_dicts.drop("Performance", axis=1)

Unnamed: 0,Name,Sales,Grades
0,Tom,300,A
1,Greg,345,A
2,Simone,745,B
3,Paula,542,A
4,Yuri,453,C


In [45]:
df_list_dicts.drop(1)

Unnamed: 0,Name,Sales,Performance,Grades
0,Tom,300,,A
2,Simone,745,,B
3,Paula,542,Outstanding,A
4,Yuri,453,,C


In [46]:
df_list_dicts

Unnamed: 0,Name,Sales,Performance,Grades
0,Tom,300,,A
1,Greg,345,,A
2,Simone,745,,B
3,Paula,542,Outstanding,A
4,Yuri,453,,C


In [47]:
df_list_dicts[["Name", "Sales", "Grades"]]

Unnamed: 0,Name,Sales,Grades
0,Tom,300,A
1,Greg,345,A
2,Simone,745,B
3,Paula,542,A
4,Yuri,453,C


In [51]:
east = pd.Series([1000,1200,3400],index=['Q1','Q2','Q3'])
west = pd.Series([1100,1300,2400,3500],index=['Q1','Q2','Q3','Q4'])
east

Q1    1000
Q2    1200
Q3    3400
dtype: int64

In [52]:
west

Q1    1100
Q2    1300
Q3    2400
Q4    3500
dtype: int64

In [53]:
df_region = pd.DataFrame({'East':east, 'West': west})
df_region

Unnamed: 0,East,West
Q1,1000.0,1100
Q2,1200.0,1300
Q3,3400.0,2400
Q4,,3500


In [55]:
df_region['North'] = [2000,3000,2500,4000]
df_region['South'] = [1500,2000,1500,4000]
df_region

Unnamed: 0,East,West,North,South
Q1,1000.0,1100,2000,1500
Q2,1200.0,1300,3000,2000
Q3,3400.0,2400,2500,1500
Q4,,3500,4000,4000


In [56]:
df_region['Years'] = ['2016', '2017', '2018', '2019']
df_region

Unnamed: 0,East,West,North,South,Years
Q1,1000.0,1100,2000,1500,2016
Q2,1200.0,1300,3000,2000,2017
Q3,3400.0,2400,2500,1500,2018
Q4,,3500,4000,4000,2019


In [59]:
df_region.set_index("Years")

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016,1000.0,1100,2000,1500
2017,1200.0,1300,3000,2000
2018,3400.0,2400,2500,1500
2019,,3500,4000,4000


In [60]:
df_region.set_index("Years", inplace=True)

In [61]:
df_region

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016,1000.0,1100,2000,1500
2017,1200.0,1300,3000,2000
2018,3400.0,2400,2500,1500
2019,,3500,4000,4000


In [62]:
new_region = df_region.reindex(["2018", "2019", "2020", "2021"])
new_region

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,,3500.0,4000.0,4000.0
2020,,,,
2021,,,,


In [66]:
re_indexed = new_region.reindex(columns=["North", "South", "New"])
re_indexed

Unnamed: 0_level_0,North,South,New
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018,2500.0,1500.0,
2019,4000.0,4000.0,
2020,,,
2021,,,


## Missing Data

In [67]:
re_indexed.fillna(0)

Unnamed: 0_level_0,North,South,New
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018,2500.0,1500.0,0.0
2019,4000.0,4000.0,0.0
2020,0.0,0.0,0.0
2021,0.0,0.0,0.0


In [68]:
new_region

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,,3500.0,4000.0,4000.0
2020,,,,
2021,,,,


In [69]:
new_region.fillna(method='ffill') # theres also 'bfill'

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,3400.0,3500.0,4000.0,4000.0
2020,3400.0,3500.0,4000.0,4000.0
2021,3400.0,3500.0,4000.0,4000.0


In [71]:
new_region.fillna(method='pad')

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,3400.0,3500.0,4000.0,4000.0
2020,3400.0,3500.0,4000.0,4000.0
2021,3400.0,3500.0,4000.0,4000.0


In [74]:
new_region.interpolate()

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,3400.0,3500.0,4000.0,4000.0
2020,3400.0,3500.0,4000.0,4000.0
2021,3400.0,3500.0,4000.0,4000.0


In [76]:
new_region.loc['2022'] = [6400, 7400, 5200, 800]
new_region

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,,3500.0,4000.0,4000.0
2020,,,,
2021,,,,
2022,6400.0,7400.0,5200.0,800.0


In [82]:
new_region.interpolate()

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,4150.0,3500.0,4000.0,4000.0
2020,4900.0,4800.0,4400.0,2933.333333
2021,5650.0,6100.0,4800.0,1866.666667
2022,6400.0,7400.0,5200.0,800.0


In [84]:
new_region.interpolate().round(2)

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,4150.0,3500.0,4000.0,4000.0
2020,4900.0,4800.0,4400.0,2933.33
2021,5650.0,6100.0,4800.0,1866.67
2022,6400.0,7400.0,5200.0,800.0


In [80]:
new_region.mean()

East     4900.000000
West     4433.333333
North    3900.000000
South    2100.000000
dtype: float64

In [81]:
new_region.fillna(new_region.mean())

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,4900.0,3500.0,4000.0,4000.0
2020,4900.0,4433.333333,3900.0,2100.0
2021,4900.0,4433.333333,3900.0,2100.0
2022,6400.0,7400.0,5200.0,800.0


In [85]:
new_region

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,,3500.0,4000.0,4000.0
2020,,,,
2021,,,,
2022,6400.0,7400.0,5200.0,800.0


In [86]:
new_region.dropna()

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2022,6400.0,7400.0,5200.0,800.0


In [88]:
new_region.dropna(axis=1, thresh=3) # 3 representing valid values

Unnamed: 0_level_0,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018,2400.0,2500.0,1500.0
2019,3500.0,4000.0,4000.0
2020,,,
2021,,,
2022,7400.0,5200.0,800.0


In [89]:
new_region["na"] = np.nan
new_region

Unnamed: 0_level_0,East,West,North,South,na
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,
2022,6400.0,7400.0,5200.0,800.0,


In [90]:
new_region.dropna(axis=1, how="all")

Unnamed: 0_level_0,East,West,North,South
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,,3500.0,4000.0,4000.0
2020,,,,
2021,,,,
2022,6400.0,7400.0,5200.0,800.0


In [91]:
new_region.dropna(axis=0, how="all")

Unnamed: 0_level_0,East,West,North,South,na
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2022,6400.0,7400.0,5200.0,800.0,


In [92]:
new_region.drop(['2020', '2021'])

Unnamed: 0_level_0,East,West,North,South,na
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2022,6400.0,7400.0,5200.0,800.0,


In [93]:
new_region


Unnamed: 0_level_0,East,West,North,South,na
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,
2022,6400.0,7400.0,5200.0,800.0,


In [94]:
new_region.loc["2017"] = [3400, 2400, 2500, 1500, np.nan]
new_region

Unnamed: 0_level_0,East,West,North,South,na
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,
2022,6400.0,7400.0,5200.0,800.0,
2017,3400.0,2400.0,2500.0,1500.0,


In [96]:
new_region.duplicated()

Years
2018    False
2019    False
2020    False
2021     True
2022    False
2017     True
dtype: bool

In [97]:
new_region.sort_index()

Unnamed: 0_level_0,East,West,North,South,na
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017,3400.0,2400.0,2500.0,1500.0,
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,
2022,6400.0,7400.0,5200.0,800.0,


In [98]:
new_region.sort_index(ascending=False) # sort descending

Unnamed: 0_level_0,East,West,North,South,na
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022,6400.0,7400.0,5200.0,800.0,
2021,,,,,
2020,,,,,
2019,,3500.0,4000.0,4000.0,
2018,3400.0,2400.0,2500.0,1500.0,
2017,3400.0,2400.0,2500.0,1500.0,


In [99]:
new_region.drop_duplicates()

Unnamed: 0_level_0,East,West,North,South,na
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2022,6400.0,7400.0,5200.0,800.0,


In [100]:
new_region

Unnamed: 0_level_0,East,West,North,South,na
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,
2022,6400.0,7400.0,5200.0,800.0,
2017,3400.0,2400.0,2500.0,1500.0,


In [101]:
new_region.West

Years
2018    2400.0
2019    3500.0
2020       NaN
2021       NaN
2022    7400.0
2017    2400.0
Name: West, dtype: float64

In [102]:
new_region["West"]

Years
2018    2400.0
2019    3500.0
2020       NaN
2021       NaN
2022    7400.0
2017    2400.0
Name: West, dtype: float64

In [103]:
new_region[["West", "East"]]

Unnamed: 0_level_0,West,East
Years,Unnamed: 1_level_1,Unnamed: 2_level_1
2018,2400.0,3400.0
2019,3500.0,
2020,,
2021,,
2022,7400.0,6400.0
2017,2400.0,3400.0


In [112]:
new_region.loc['2022']

East     6400.0
West     7400.0
North    5200.0
South     800.0
na          NaN
Name: 2022, dtype: float64

In [106]:
new_region.iloc[2] # integer loc

East    NaN
West    NaN
North   NaN
South   NaN
na      NaN
Name: 2020, dtype: float64

In [107]:
new_region

Unnamed: 0_level_0,East,West,North,South,na
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,
2022,6400.0,7400.0,5200.0,800.0,
2017,3400.0,2400.0,2500.0,1500.0,


In [110]:
new_region.iloc[1, 2]

4000.0

In [111]:
new_region.iloc[1,2:]

North    4000.0
South    4000.0
na          NaN
Name: 2019, dtype: float64

In [116]:
new_region[new_region["North"]]

KeyError: "None of [Float64Index([2500.0, 4000.0, nan, nan, 5200.0, 2500.0], dtype='float64')] are in the [columns]"

In [120]:
new_region[new_region.North >= 4000]

Unnamed: 0_level_0,East,West,North,South,na
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019,,3500.0,4000.0,4000.0,
2022,6400.0,7400.0,5200.0,800.0,


In [122]:
new_region["East"] + new_region["South"]

Years
2018    4900.0
2019       NaN
2020       NaN
2021       NaN
2022    7200.0
2017    4900.0
dtype: float64

In [123]:
new_region["North"] + new_region["South"]

Years
2018    4000.0
2019    8000.0
2020       NaN
2021       NaN
2022    6000.0
2017    4000.0
dtype: float64

In [125]:
new_region["East"].add(new_region["South"], fill_value=0)

Years
2018    4900.0
2019    4000.0
2020       NaN
2021       NaN
2022    7200.0
2017    4900.0
dtype: float64

In [126]:
new_region["Total"] = new_region["East"].add(new_region["South"], fill_value=0).add(new_region["North"], fill_value=0).add(new_region["North"], fill_value=0)

In [127]:
new_region

Unnamed: 0_level_0,East,West,North,South,na,Total
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018,3400.0,2400.0,2500.0,1500.0,,9900.0
2019,,3500.0,4000.0,4000.0,,12000.0
2020,,,,,,
2021,,,,,,
2022,6400.0,7400.0,5200.0,800.0,,17600.0
2017,3400.0,2400.0,2500.0,1500.0,,9900.0


In [128]:
new_region["South"].rank(ascending=False)

Years
2018    2.5
2019    1.0
2020    NaN
2021    NaN
2022    4.0
2017    2.5
Name: South, dtype: float64

In [130]:
new_region["South"].rank(ascending=0)

Years
2018    2.5
2019    1.0
2020    NaN
2021    NaN
2022    4.0
2017    2.5
Name: South, dtype: float64

In [131]:
new_region.describe()

Unnamed: 0,East,West,North,South,na,Total
count,3.0,4.0,4.0,4.0,0.0,4.0
mean,4400.0,3925.0,3550.0,1950.0,,12350.0
std,1732.050808,2373.991014,1307.669683,1405.939781,,3637.306696
min,3400.0,2400.0,2500.0,800.0,,9900.0
25%,3400.0,2400.0,2500.0,1325.0,,9900.0
50%,3400.0,2950.0,3250.0,1500.0,,10950.0
75%,4900.0,4475.0,4300.0,2125.0,,13400.0
max,6400.0,7400.0,5200.0,4000.0,,17600.0


In [132]:
new_region.mean()

East      4400.0
West      3925.0
North     3550.0
South     1950.0
na           NaN
Total    12350.0
dtype: float64

In [133]:
new_region.sum()

East     13200.0
West     15700.0
North    14200.0
South     7800.0
na           0.0
Total    49400.0
dtype: float64

In [134]:
new_region.sum(axis=1)

Years
2018    19700.0
2019    23500.0
2020        0.0
2021        0.0
2022    37400.0
2017    19700.0
dtype: float64

In [139]:
new_region.sort_index().cumsum()

Unnamed: 0_level_0,East,West,North,South,na,Total
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017,3400.0,2400.0,2500.0,1500.0,,9900.0
2018,6800.0,4800.0,5000.0,3000.0,,19800.0
2019,,8300.0,9000.0,7000.0,,31800.0
2020,,,,,,
2021,,,,,,
2022,13200.0,15700.0,14200.0,7800.0,,49400.0


In [140]:
new_region.min()

East     3400.0
West     2400.0
North    2500.0
South     800.0
na          NaN
Total    9900.0
dtype: float64

In [141]:
new_region.std()

East     1732.050808
West     2373.991014
North    1307.669683
South    1405.939781
na               NaN
Total    3637.306696
dtype: float64