In [1]:
import pandas as pd
import numpy as np
import requests
from io import StringIO

In [2]:
orig_url = 'https://drive.google.com/file/d/1uXK-rG07T1njpXvTNxNZcmJrVJQmfeRc/view?usp=sharing'
file_id = orig_url.split('/')[-2]
dwn_url = 'https://drive.google.com/uc?export=download&id=' + file_id

purchased = pd.read_excel(dwn_url, skiprows=1, names=['Desc', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',
                                                      'Growth ratio'])
purchased

Unnamed: 0,Desc,2011,2012,2013,2014,2015,2016,2017,2018,Growth ratio
0,Aantal (x 1000),1198,1035,1008,1051,983,928,957,1011,0.06
1,Waarde (x € 1000),879240,769159,797264,886605,899020,936829,976034,1220291,0.25


In [3]:
#Overview
print(purchased.head())
print(purchased.shape)
# purchased.columns() # It looks like there is no such method for data frames.

                Desc    2011    2012    2013    2014    2015    2016    2017  \
0    Aantal (x 1000)    1198    1035    1008    1051     983     928     957   
1  Waarde (x € 1000)  879240  769159  797264  886605  899020  936829  976034   

      2018  Growth ratio  
0     1011          0.06  
1  1220291          0.25  
(2, 10)


In [4]:
#Checking the types
purchased.dtypes

Desc             object
2011              int64
2012              int64
2013              int64
2014              int64
2015              int64
2016              int64
2017              int64
2018              int64
Growth ratio    float64
dtype: object

In [5]:
#Adding the missing column. To be able to extrapolate the missing values later
purchased['2019']=""

In [6]:
# Calculating 2019 quantity based on growth ratio
purchased['2019'] = round(purchased['2018']*(1+purchased['Growth ratio']), 0).astype(int)
purchased

Unnamed: 0,Desc,2011,2012,2013,2014,2015,2016,2017,2018,Growth ratio,2019
0,Aantal (x 1000),1198,1035,1008,1051,983,928,957,1011,0.06,1072
1,Waarde (x € 1000),879240,769159,797264,886605,899020,936829,976034,1220291,0.25,1525364


In [7]:
#Taking out the columns and the row that we won't be needing
total_purchased = purchased.drop(['2011', '2012', '2013', '2014', '2015', 'Growth ratio'],axis=1)
total_purchased = total_purchased.loc[total_purchased['Desc'] == 'Aantal (x 1000)']

In [8]:
total_purchased['Desc'] = 'Purchased bikes NL'

In [9]:
total_purchased

Unnamed: 0,Desc,2016,2017,2018,2019
0,Purchased bikes NL,928,957,1011,1072


In [10]:
#Displaying the correct unit to the Total_purchased df
total_purchased = total_purchased[['2016', '2017', '2018', '2019']].apply(lambda x: x*1000)
total_purchased.insert(0, 'Desc', 'Purchased bikes')


In [11]:
total_purchased 

Unnamed: 0,Desc,2016,2017,2018,2019
0,Purchased bikes,928000,957000,1011000,1072000


In [12]:
#Importing a separate table to be added to the already inserted 'purchase' one:

url_1 = 'https://drive.google.com/file/d/1y1nWsBv4RHFIGLqAcgH84Iqq1a2rEWW-/view?usp=sharing'
file_id = url_1.split('/')[-2]
url = 'https://drive.google.com/uc?export=download&id=' + file_id

purchased_ams = pd.read_excel(url, skiprows=1, nrows=6, names=['Desc', '2014', '2015', '2016','2017','2018','2019','tbd'])
purchased_ams 

Unnamed: 0,Desc,2014,2015,2016,2017,2018,2019,tbd
0,purchased NL x1000,1051.0,983.0,928.0,957.0,1011.0,1071.66,
1,purchased AMS x1000,163.11487,156.000497,92.802719,89.994155,86.224719,80.257868,
2,Amsterdam,11087.0,12441.0,10239.0,9364.0,8671.0,9393.0,
3,Nederland,71437.0,78394.0,102387.0,99577.0,101669.0,125422.0,
4,fetched out bikes_AVG NL,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,
5,fetched out bikes_AVG AMS,6000.0,6000.0,6000.0,6000.0,6000.0,6000.0,


In [13]:
# dropping unnecessary columns and rows:
purchased_ams = purchased_ams.drop(['2014','2015','tbd'], axis =1)
purchased_ams = purchased_ams.drop([0,2,3], axis =0)
purchased_ams

Unnamed: 0,Desc,2016,2017,2018,2019
1,purchased AMS x1000,92.802719,89.994155,86.224719,80.257868
4,fetched out bikes_AVG NL,15000.0,15000.0,15000.0,15000.0
5,fetched out bikes_AVG AMS,6000.0,6000.0,6000.0,6000.0


In [14]:
#changing name in row to meaningful name
purchased_ams['Desc'][1]='Purchased bikes AMS'
purchased_ams

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  purchased_ams['Desc'][1]='Purchased bikes AMS'


Unnamed: 0,Desc,2016,2017,2018,2019
1,Purchased bikes AMS,92.802719,89.994155,86.224719,80.257868
4,fetched out bikes_AVG NL,15000.0,15000.0,15000.0,15000.0
5,fetched out bikes_AVG AMS,6000.0,6000.0,6000.0,6000.0


In [15]:
#concatenating the tables
frames = [total_purchased,purchased_ams]
total_purchased = pd.concat(frames)

In [16]:
total_purchased

Unnamed: 0,Desc,2016,2017,2018,2019
0,Purchased bikes,928000.0,957000.0,1011000.0,1072000.0
1,Purchased bikes AMS,92.802719,89.994155,86.22472,80.25787
4,fetched out bikes_AVG NL,15000.0,15000.0,15000.0,15000.0
5,fetched out bikes_AVG AMS,6000.0,6000.0,6000.0,6000.0


In [17]:
#Displaying correct unit in row 2:
#First i transpose the table, not to affect the rest of rows
total_purchased.transpose()

Unnamed: 0,0,1,4,5
Desc,Purchased bikes,Purchased bikes AMS,fetched out bikes_AVG NL,fetched out bikes_AVG AMS
2016,928000,92.8027,15000,6000
2017,957000,89.9942,15000,6000
2018,1.011e+06,86.2247,15000,6000
2019,1.072e+06,80.2579,15000,6000


In [21]:
#Applying the formula for the concerned column
total_purchased.iloc[1] = total_purchased.iloc[1].apply(lambda x: x*1000)

#Note: This actually affected the Desc. at index 1

In [23]:
total_purchased

Unnamed: 0,Desc,2016,2017,2018,2019
0,Purchased bikes,928000.0,957000.0,1011000.0,1072000.0
1,Purchased bikes AMSPurchased bikes AMSPurchase...,92802.7191,89994.15528,86224.72,80257.87
4,fetched out bikes_AVG NL,15000.0,15000.0,15000.0,15000.0
5,fetched out bikes_AVG AMS,6000.0,6000.0,6000.0,6000.0


In [26]:
#Renaming Description at row index 1
total_purchased['Desc'][1] = 'Purchased bikes AMS'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  total_purchased['Desc'][1] = 'Purchased bikes AMS'


In [13]:
total_purchased

Unnamed: 0,Desc,2016,2017,2018,2019
0,Purchased bikes,928000,957000,1011000,1072000


In [32]:
convert_dict = {'Desc': object,
                '2016': int, 
                '2017': int,
                '2018': int,
                '2019': int,
               } 
  
total_purchased = total_purchased.astype(convert_dict) 

In [33]:
total_purchased

Unnamed: 0,Desc,2016,2017,2018,2019
0,Purchased bikes,928000,957000,1011000,1072000
1,Purchased bikes AMS,92802,89994,86224,80257
4,fetched out bikes_AVG NL,15000,15000,15000,15000
5,fetched out bikes_AVG AMS,6000,6000,6000,6000


In [None]:
## Displaying correct unit in row 2:

# First I transpose the table, not to affect the rest of rows
# total_purchased.transpose()
# #Applying the formula for the concerned column
# total_purchased.iloc[1] = total_purchased.iloc[1].apply(lambda x: x*1000)

##Note: This actually affected the Desc. at index 1

# total_purchased
#Renaming Description at row index 1
# total_purchased['Desc'][1] = 'Purchased bikes AMS'
# total_purchased['']



In [32]:
# The data type is to be changed (for diplaying reasons)

convert_dict = {'Desc': object,
                '2016': int, 
                '2017': int,
                '2018': int,
                '2019': int,
               } 
  
total_purchased = total_purchased.astype(convert_dict) 

In [33]:
total_purchased

Unnamed: 0,Desc,2016,2017,2018,2019
0,Purchased bikes,928000,957000,1011000,1072000
0,Purchased bikes AMS,45020,46682,49535,52726
0,Fetched bikes AMS,6000,6000,6000,6000
0,Fetched bikes NL,15000,15000,15000,15000
