In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('sample_pivot.csv')
df.head()

Unnamed: 0,Date,Region,Type,Units,Sales
0,7/11/2020,East,Children's Clothing,18.0,306
1,9/23/2020,North,Children's Clothing,14.0,448
2,4/2/2020,South,Women's Clothing,17.0,425
3,2/28/2020,East,Children's Clothing,26.0,832
4,3/19/2020,West,Women's Clothing,3.0,33


In [3]:
df['Units'].fillna(np.mean(df['Units']), inplace=True)
df.isnull().sum()

Date      0
Region    0
Type      0
Units     0
Sales     0
dtype: int64

In [4]:
# conditional Selection
df[df['Region'] == 'South']

Unnamed: 0,Date,Region,Type,Units,Sales
2,4/2/2020,South,Women's Clothing,17.0,425
6,1/24/2020,South,Women's Clothing,12.0,396
10,4/16/2020,South,Women's Clothing,16.0,352
42,3/17/2020,South,Children's Clothing,33.0,924
46,7/30/2020,South,Children's Clothing,18.0,486
...,...,...,...,...,...
983,2/18/2020,South,Children's Clothing,21.0,462
987,4/23/2020,South,Women's Clothing,34.0,680
992,4/15/2020,South,Women's Clothing,25.0,750
997,8/31/2020,South,Men's Clothing,13.0,208


In [5]:
df[(df['Region'] == 'South') & (df['Sales'] >=800)]

Unnamed: 0,Date,Region,Type,Units,Sales
42,3/17/2020,South,Children's Clothing,33.0,924
71,11/4/2020,South,Men's Clothing,28.0,868
222,5/28/2020,South,Women's Clothing,13.0,1122
420,7/9/2020,South,Men's Clothing,15.0,924
463,9/11/2020,South,Children's Clothing,15.0,832
513,10/4/2020,South,Children's Clothing,33.0,891
557,9/4/2020,South,Men's Clothing,34.0,1085
614,2/24/2020,South,Men's Clothing,8.0,1056
653,12/19/2020,South,Men's Clothing,16.0,864
665,3/30/2020,South,Children's Clothing,16.0,896


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    1000 non-null   object 
 1   Region  1000 non-null   object 
 2   Type    1000 non-null   object 
 3   Units   1000 non-null   float64
 4   Sales   1000 non-null   int64  
dtypes: float64(1), int64(1), object(3)
memory usage: 39.2+ KB


In [7]:
# the date column is in object/string data type. you convert to date data type before selection.
df['Date'] = pd.to_datetime(df['Date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    1000 non-null   datetime64[ns]
 1   Region  1000 non-null   object        
 2   Type    1000 non-null   object        
 3   Units   1000 non-null   float64       
 4   Sales   1000 non-null   int64         
dtypes: datetime64[ns](1), float64(1), int64(1), object(2)
memory usage: 39.2+ KB


In [8]:
df[(df['Date'] >= pd.Timestamp(2020,4,1)) & (df['Date'] <= pd.Timestamp(2020,5,31)) & (df['Region'] == 'East')]

Unnamed: 0,Date,Region,Type,Units,Sales
12,2020-05-01,East,Men's Clothing,10.0,140
44,2020-05-11,East,Men's Clothing,35.0,700
45,2020-04-29,East,Women's Clothing,14.0,462
86,2020-05-12,East,Women's Clothing,29.0,812
92,2020-04-14,East,Children's Clothing,12.0,264
...,...,...,...,...,...
901,2020-04-24,East,Men's Clothing,28.0,36
907,2020-05-16,East,Children's Clothing,12.0,120
937,2020-05-23,East,Men's Clothing,33.0,384
952,2020-04-30,East,Women's Clothing,7.0,459


In [9]:
# sorting of values
df.sort_values('Date', ignore_index=True, ascending=True, inplace=True) # default ascending=true. to set it at descending, you make ascending=false
df.head()  # NB you can use same to sort for the column data(Region,Type,Units,Sales)

Unnamed: 0,Date,Region,Type,Units,Sales
0,2020-01-01,East,Women's Clothing,12.0,322
1,2020-01-01,East,Men's Clothing,18.0,234
2,2020-01-02,East,Children's Clothing,18.0,204
3,2020-01-02,East,Women's Clothing,9.0,374
4,2020-01-03,North,Men's Clothing,19.638858,240


In [10]:
# Apply Method
df.apply('mean', numeric_only =True)

Units     19.638858
Sales    427.254000
dtype: float64

In [11]:
df.apply('max')

Date      2020-12-31 00:00:00
Region                   West
Type         Women's Clothing
Units                    35.0
Sales                    1155
dtype: object

In [12]:
df.apply('std', numeric_only=True)

Units      9.039574
Sales    253.441362
dtype: float64

In [13]:
df['Sales'].mean()

427.254

In [14]:
# group and apply aggregate functions
df.groupby('Type').apply('sum', numeric_only=True)

Unnamed: 0_level_0,Units,Sales
Type,Unnamed: 1_level_1,Unnamed: 2_level_1
Children's Clothing,5887.0,121907
Men's Clothing,5721.858397,129279
Women's Clothing,8030.0,176068


In [15]:
# pivot table
df_pv = pd.pivot_table(df, index='Region', columns='Type', values=['Units','Sales'])
df_pv

Unnamed: 0_level_0,Sales,Sales,Sales,Units,Units,Units
Type,Children's Clothing,Men's Clothing,Women's Clothing,Children's Clothing,Men's Clothing,Women's Clothing
Region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
East,405.743363,423.647541,399.028409,20.513274,19.836066,19.159091
North,438.894118,449.157303,432.528169,20.741176,19.638858,18.28169
South,412.666667,475.435897,418.924528,22.6,18.589744,19.924528
West,480.52381,465.292683,419.188679,18.785714,20.219512,18.981132


In [16]:
df_pv = pd.pivot_table(df, index='Region', columns='Type', values=['Units','Sales'], aggfunc='count')
df_pv

Unnamed: 0_level_0,Sales,Sales,Sales,Units,Units,Units
Type,Children's Clothing,Men's Clothing,Women's Clothing,Children's Clothing,Men's Clothing,Women's Clothing
Region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
East,113,122,176,113,122,176
North,85,89,142,85,89,142
South,45,39,53,45,39,53
West,42,41,53,42,41,53


In [17]:
df_pv = pd.pivot_table(df, index='Region', columns='Type', values=['Units','Sales'], aggfunc='sum', margins=True, margins_name='Total')
df_pv

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Units,Units,Units,Units
Type,Children's Clothing,Men's Clothing,Women's Clothing,Total,Children's Clothing,Men's Clothing,Women's Clothing,Total
Region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
East,45849.0,51685.0,70229.0,167763,2318.0,2420.0,3372.0,8110.0
North,37306.0,39975.0,61419.0,138700,1763.0,1747.858397,2596.0,6106.858397
South,18570.0,18542.0,22203.0,59315,1017.0,725.0,1056.0,2798.0
West,20182.0,19077.0,22217.0,61476,789.0,829.0,1006.0,2624.0
Total,121907.0,129279.0,176068.0,427254,5887.0,5721.858397,8030.0,19638.858397


In [18]:
# Cross Tabulation: checks relationship or corelation between two categorical column
pd.crosstab(df['Region'], df['Type'], margins=True, margins_name='Total', values=df['Sales'], aggfunc='mean')

Type,Children's Clothing,Men's Clothing,Women's Clothing,Total
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
East,405.743363,423.647541,399.028409,408.182482
North,438.894118,449.157303,432.528169,438.924051
South,412.666667,475.435897,418.924528,432.956204
West,480.52381,465.292683,419.188679,452.029412
Total,427.74386,444.257732,415.254717,427.254


In [19]:
# web scrapping for html tables
import requests
url_link = 'https://finance.yahoo.com/markets/crypto/all/'
r = requests.get(url_link,headers ={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebkit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'})
read_html_pandas_data = pd.read_html(r.text)[0]
df2= pd.DataFrame(read_html_pandas_data)
df2

  read_html_pandas_data = pd.read_html(r.text)[0]


Unnamed: 0,Symbol,Price,Change,Change %,Day Chart,Market Cap,Volume,Volume In Currency (24hr),Total Volume All Currencies (24hr),Circulating Supply,52 Wk Change %,52 Wk Range
0,BTC-USD Bitcoin USD,"63,040.50 -244.46 (-0.39%)",-244.46,-0.39%,,1.245T,26.251B,26.251B,26.251B,19.757M,136.80%,
1,ETH-USD Ethereum USD,"2,549.28 +2.64 (+0.10%)",2.64,+0.10%,,306.798B,14.875B,14.875B,14.875B,120.347M,54.64%,
2,USDT-USD Tether USDt USD,1.0001 -0.0000 (-0.00%),-0.0,-0.00%,,119.179B,49.519B,49.519B,49.519B,119.166B,0.01%,
3,BNB-USD BNB USD,586.06 +14.58 (+2.55%),14.58,+2.55%,,85.525B,1.775B,1.775B,1.775B,145.934M,168.91%,
4,SOL-USD Solana USD,147.16 -3.69 (-2.45%),-3.69,-2.45%,,68.945B,2.311B,2.311B,2.311B,468.489M,633.48%,
5,USDC-USD USD Coin USD,1.0000 +0.0000 (+0.00%),0.0,+0.00%,,35.864B,4.671B,4.671B,4.671B,35.863B,-0.00%,
6,XRP-USD XRP USD,0.582881 -0.006638 (-1.13%),-0.006638,-1.13%,,32.895B,895.122M,895.122M,895.122M,56.435B,15.62%,
7,STETH-USD Lido Staked ETH USD,"2,549.57 +6.39 (+0.25%)",6.39,+0.25%,,24.859B,25.136M,25.136M,25.136M,9.75M,54.52%,
8,DOGE-USD Dogecoin USD,0.105985 -0.000524 (-0.49%),-0.000524,-0.49%,,15.479B,499.021M,499.021M,499.021M,146.046B,70.76%,
9,TON11419-USD Toncoin USD,5.5407 -0.1922 (-3.35%),-0.1922,-3.35%,,14.033B,240.363M,240.363M,240.363M,2.533B,147.07%,


In [20]:
rate = pd.read_csv('exchange20092024.csv')
rate.head()

Unnamed: 0,Rate Date,Currency,Rate Year,Rate Month,Buying Rate,Central Rate,Selling Rate
9/19/2024,CFA,2024,September,2.7819,2.7919,2.8019,
9/19/2024,YUAN/RENMINBI,2024,September,225.3505,225.4212,225.4919,
9/19/2024,DANISH KRONA,2024,September,237.4456,237.5201,237.5946,
9/19/2024,EURO,2024,September,1771.4938,1772.0498,1772.6058,
9/19/2024,YEN,2024,September,11.1232,11.1267,11.1302,


In [21]:
rate.reset_index(inplace=True)
rate.head()

Unnamed: 0,index,Rate Date,Currency,Rate Year,Rate Month,Buying Rate,Central Rate,Selling Rate
0,9/19/2024,CFA,2024,September,2.7819,2.7919,2.8019,
1,9/19/2024,YUAN/RENMINBI,2024,September,225.3505,225.4212,225.4919,
2,9/19/2024,DANISH KRONA,2024,September,237.4456,237.5201,237.5946,
3,9/19/2024,EURO,2024,September,1771.4938,1772.0498,1772.6058,
4,9/19/2024,YEN,2024,September,11.1232,11.1267,11.1302,


In [22]:
rate.columns

Index(['index', 'Rate Date', 'Currency', 'Rate Year', 'Rate Month',
       'Buying Rate', 'Central Rate', 'Selling Rate'],
      dtype='object')

In [23]:
rate.rename({'index':'Rate Date', 'Rate Date':'Currency', 'Currency':'Rate Year', 'Rate Year':'Rate Month', 'Rate Month':'Buying Rate',
'Buying Rate':'Central Rate', 'Central Rate':'Selling Rate', 'Selling Rate':'Nill'}, axis=1, inplace=True)
rate.head()

Unnamed: 0,Rate Date,Currency,Rate Year,Rate Month,Buying Rate,Central Rate,Selling Rate,Nill
0,9/19/2024,CFA,2024,September,2.7819,2.7919,2.8019,
1,9/19/2024,YUAN/RENMINBI,2024,September,225.3505,225.4212,225.4919,
2,9/19/2024,DANISH KRONA,2024,September,237.4456,237.5201,237.5946,
3,9/19/2024,EURO,2024,September,1771.4938,1772.0498,1772.6058,
4,9/19/2024,YEN,2024,September,11.1232,11.1267,11.1302,


In [24]:
rate.drop('Nill', axis=1, inplace=True)
rate.head()

Unnamed: 0,Rate Date,Currency,Rate Year,Rate Month,Buying Rate,Central Rate,Selling Rate
0,9/19/2024,CFA,2024,September,2.7819,2.7919,2.8019
1,9/19/2024,YUAN/RENMINBI,2024,September,225.3505,225.4212,225.4919
2,9/19/2024,DANISH KRONA,2024,September,237.4456,237.5201,237.5946
3,9/19/2024,EURO,2024,September,1771.4938,1772.0498,1772.6058
4,9/19/2024,YEN,2024,September,11.1232,11.1267,11.1302


In [25]:
rate.isnull().sum()

Rate Date       0
Currency        0
Rate Year       0
Rate Month      0
Buying Rate     0
Central Rate    0
Selling Rate    0
dtype: int64

In [26]:
rate['Currency'].duplicated()

0        False
1        False
2        False
3        False
4        False
         ...  
56063     True
56064     True
56065     True
56066     True
56067     True
Name: Currency, Length: 56068, dtype: bool

In [27]:
rate['Currency'].unique()

array(['CFA', 'YUAN/RENMINBI', 'DANISH KRONA', 'EURO', 'YEN', 'RIYAL',
       'SOUTH AFRICAN RAND', 'SDR', 'SWISS FRANC', 'POUNDS STERLING',
       'US DOLLAR', 'WAUA', 'POUND STERLING', 'DANISH KRONER',
       'JAPANESE YEN', 'US DOLLAR ', 'YEN ', 'CFA ', 'WAUA ', 'SDR ',
       'EURO\t', 'EURO ', 'RIYAL ', 'SWISS FRANC\t', 'SDR\t', 'NAIRA',
       'POESO'], dtype=object)

In [33]:
rate['Currency'].nunique

<bound method IndexOpsMixin.nunique of 0                  CFA
1        YUAN/RENMINBI
2         DANISH KRONA
3                 EURO
4                  YEN
             ...      
56063              CFA
56064             EURO
56065             WAUA
56066        US DOLLAR
56067              YEN
Name: Currency, Length: 56068, dtype: object>

In [38]:
rate.replace({'POUND STERLING':'POUNDS STERLING','DANISH KRONER':'DANISH KRONA','JAPANESE YEN':'YEN', 'US DOLLAR ':'US DOLLAR', 'YEN ':'YEN',
'CFA ':'CFA', 'WAUA ':'WAUA','SDR ':'SDR','EURO\t':'EURO', 'EURO ':'EURO', 'RIYAL ':'RIYAL', 'SWISS FRANC\t':'SWISS FRANC',
'SDR\t':'SDR'}, inplace=True)
rate['Currency'].unique()

array(['CFA', 'YUAN/RENMINBI', 'DANISH KRONA', 'EURO', 'YEN', 'RIYAL',
       'SOUTH AFRICAN RAND', 'SDR', 'SWISS FRANC', 'POUNDS STERLING',
       'US DOLLAR', 'WAUA', 'NAIRA', 'POESO'], dtype=object)

In [39]:
rate.to_csv('Exchange_Rate.csv', index=False) 

In [40]:
rate.to_excel('Exchange_Rate.xlsx', sheet_name='RATE',index=False)

In [49]:
# saving as an Excel file with multiple sheets on one file.
with pd.ExcelWriter('Exchange.xlsx', engine='xlsxwriter') as f:
    rate[rate['Currency'] =='US DOLLAR'].to_excel(f,sheet_name='USD',engine='xlsxwriter',index=False)
    rate[rate['Currency'] =='POUNDS STERLING'].to_excel(f,sheet_name='POUNDS',engine='xlsxwriter',index=False)
    rate[rate['Currency'] =='EURO'].to_excel(f,sheet_name='EURO',engine='xlsxwriter',index=False)
    rate[rate['Currency'] =='YEN'].to_excel(f,sheet_name='YEN',engine='xlsxwriter',index=False)
    rate[rate['Currency'] =='NAIRA'].to_excel(f,sheet_name='NAIRA',engine='xlsxwriter',index=False)    