# TINO IQ ASSIGNMENT

### <span style="color:orange">AUTHOR:  <br><br>**Manasi Chhibber**</span>

### <span style="color:orange">AIM:  <br><br>To predict value of variable (target) for next 6 quarters using Pycaret package.</span>

### <span style="color:orange">THEORY:  <br><br>PyCaret is an open source ML library that eases the process of preparing the data to deploying the model. Here, as the 'Target' attribute has continuous type values, hence we'll use PyCaret Regression technique.</span>

### Importing Libraries

In [1]:
import pandas as pd                   # for reading the csv file
from pycaret.regression import *      # for setting up a regression model
import matplotlib.pyplot as plt       # for plotting graphs

### Getting our Data

In [2]:
df = pd.read_csv('fred_quarterly.csv')
df

Unnamed: 0,date,month,quarter,Target,gnp,gdpdef,gdp
0,1/1/2000,2000-01,2000Q1,12935.252,10035.263,77.325,10002.179
1,4/1/2000,2000-04,2000Q2,13170.749,10283.391,77.807,10247.72
2,7/1/2000,2000-07,2000Q3,13183.89,10348.645,78.263,10318.165
3,10/1/2000,2000-10,2000Q4,13262.25,10490.43,78.688,10435.744
4,1/1/2001,2001-01,2001Q1,13219.251,10510.297,79.204,10470.231
5,4/1/2001,2001-04,2001Q2,13301.394,10647.427,79.683,10599.0
6,7/1/2001,2001-07,2001Q3,13248.142,10623.039,79.996,10598.02
7,10/1/2001,2001-10,2001Q4,13284.881,10748.408,80.245,10660.465
8,1/1/2002,2002-01,2002Q1,13394.91,10833.143,80.504,10783.5
9,4/1/2002,2002-04,2002Q2,13477.356,10918.384,80.783,10887.46


### <span style="color:orange">PyCaret can independently handle the task of preprocessing the data, so we can directly setup the model and see what all processing it does.</span>

### Model Setup

In [3]:
model = setup(data = df, 
             target = 'Target',
             numeric_imputation = 'mean',
             numeric_features = ['gnp','gdpdef','gdp'],
             categorical_features = ['quarter','date','month'], 
             normalize = True,
             silent = True)

Unnamed: 0,Description,Value
0,session_id,1148
1,Target,Target
2,Original Data,"(86, 7)"
3,Missing Values,True
4,Numeric Features,3
5,Categorical Features,3
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(60, 49)"


### <span style="color:orange">As it can be seen here, null values got treated, data was normalized and perfect collinear attributes were removed.</span>

### Model Comparison

In [4]:
# comparing different models
compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
omp,Orthogonal Matching Pursuit,128.3151,31066.491,156.2661,0.9871,0.0097,0.0081,0.006
llar,Lasso Least Angle Regression,138.8386,30482.2283,160.0857,0.987,0.0101,0.0088,0.006
br,Bayesian Ridge,176.0479,75529.8862,220.3959,0.976,0.0133,0.0108,0.006
et,Extra Trees Regressor,121.4237,85454.6518,197.5403,0.9759,0.0113,0.0071,0.046
ridge,Ridge Regression,190.3951,72694.0943,230.9026,0.9751,0.0138,0.0117,0.005
huber,Huber Regressor,158.4873,85350.2573,218.4328,0.9739,0.0129,0.0096,0.012
catboost,CatBoost Regressor,140.1238,110103.1861,221.2117,0.9698,0.0128,0.0083,0.446
rf,Random Forest Regressor,173.3358,102858.7471,247.4587,0.9692,0.0144,0.0103,0.052
gbr,Gradient Boosting Regressor,181.1309,109318.1549,255.8311,0.9665,0.0149,0.0108,0.013
ada,AdaBoost Regressor,201.5824,109302.79,271.3463,0.9633,0.016,0.0121,0.019


OrthogonalMatchingPursuit(fit_intercept=True, n_nonzero_coefs=None,
                          normalize=True, precompute='auto', tol=None)

### <span style="color:orange">Orthogonal Matching Pursuit is providing the most accurate results, let's go ahead with this.</span>

### Model Building

In [5]:
omp = create_model('omp')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,164.6302,32409.7846,180.0272,0.963,0.0118,0.0109
1,268.1597,132841.7098,364.4746,0.9658,0.0209,0.0162
2,159.9927,36011.1166,189.766,0.9897,0.0115,0.0099
3,62.4017,6424.245,80.1514,0.9968,0.0051,0.0039
4,113.0984,14161.9949,119.0042,0.9926,0.0083,0.0078
5,124.3195,20549.1369,143.3497,0.9837,0.0095,0.008
6,84.8454,10060.5156,100.3021,0.9956,0.0063,0.0052
7,46.1191,4073.7635,63.826,0.9991,0.0043,0.003
8,107.9393,15994.7673,126.4704,0.9937,0.0084,0.0071
9,151.6447,38137.8754,195.2892,0.9911,0.0114,0.0091


In [6]:
# tuning the parameters is good
tuned_omp = tune_model(omp)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,163.8629,32974.9918,181.5902,0.9623,0.0119,0.0108
1,266.5582,132289.5711,363.7163,0.966,0.0208,0.0161
2,159.5893,36069.6459,189.9201,0.9896,0.0115,0.0098
3,62.8058,6504.4893,80.6504,0.9968,0.0051,0.0039
4,106.8632,12782.4453,113.0595,0.9934,0.0078,0.0074
5,124.8663,21244.4944,145.7549,0.9831,0.0097,0.0081
6,88.8889,10834.4656,104.0887,0.9953,0.0066,0.0055
7,44.796,3713.738,60.9404,0.9992,0.0041,0.0029
8,107.6284,16385.8927,128.0074,0.9936,0.0085,0.0071
9,150.9969,38628.0621,196.5402,0.991,0.0115,0.009


In [7]:
# our model is ready!

### <span style="color:orange">Using Orthogonal Matching Pursuit, a maximum accuracy of 99.92% can be reached.</span>

### Making Predictions

In [8]:
# but first, let's create a sample set for the next 6 quarters
data = [['7/1/2021', '2021-07', '2021Q3', 23768.420, 118.575, 23498.231],  # quarter-1
        ['10/1/2021', '2021-10', '2021Q4', 22832.897, 116.235, 22143.997], # quarter-2
        ['1/1/2022', '2022-01', '2022Q1', 21435.112, 115.789, 18765.309],  # quarter-3
        ['4/1/2022', '2022-04', '2022Q2', 24310.577, 119.690, 20320.005],  # quarter-4
        ['7/1/2022', '2022-07', '2022Q3', 23994.312, 117.719, 21845.269],  # quarter-5
        ['10/1/2022', '2022-10', '2022Q4', 22984.351, 118.748, 19118.800]] # quarter-6
 
# creating the pandas DataFrame
test = pd.DataFrame(data, columns = ['date','month','quarter','gnp','gdpdef','gdp'])
test

Unnamed: 0,date,month,quarter,gnp,gdpdef,gdp
0,7/1/2021,2021-07,2021Q3,23768.42,118.575,23498.231
1,10/1/2021,2021-10,2021Q4,22832.897,116.235,22143.997
2,1/1/2022,2022-01,2022Q1,21435.112,115.789,18765.309
3,4/1/2022,2022-04,2022Q2,24310.577,119.69,20320.005
4,7/1/2022,2022-07,2022Q3,23994.312,117.719,21845.269
5,10/1/2022,2022-10,2022Q4,22984.351,118.748,19118.8


In [9]:
# making the predictions on the sample set
predictions = predict_model(tuned_omp, data = test)
predictions = predictions.rename(columns = {'Label':'Target'})
predictions

Unnamed: 0,date,month,quarter,gnp,gdpdef,gdp,Target
0,7/1/2021,2021-07,2021Q3,23768.42,118.575,23498.231,20127.235624
1,10/1/2021,2021-10,2021Q4,22832.897,116.235,22143.997,19420.928335
2,1/1/2022,2022-01,2022Q1,21435.112,115.789,18765.309,17658.758125
3,4/1/2022,2022-04,2022Q2,24310.577,119.69,20320.005,18469.617624
4,7/1/2022,2022-07,2022Q3,23994.312,117.719,21845.269,19265.126036
5,10/1/2022,2022-10,2022Q4,22984.351,118.748,19118.8,17843.123543


### <span style="color:orange">The 'Target' attribute shows the final predictions.</span>