# Correlation Plot

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import io

In [None]:
from google.colab import files
myfile = files.upload()

In [None]:
example_data = pd.read_csv(io.BytesIO(myfile['example-correlation.csv']))
example_data

In [None]:
exam_corr = example_data.corr()
exam_corr

In [None]:
sns.heatmap(exam_corr, annot=True)

# Variables in Heart Failure Prediction data
#### - age: age
#### - anaemia: Decrease of red blood cells or hemoglobin (boolean)
#### - creatinine_phosphokinase: evel of the CPK enzyme in the blood (mcg/L)
#### - diabetes: If the patient has diabetes (boolean)
#### - ejection_fraction: Percentage of blood leaving the heart at each contraction (percentage)
#### - high_blood_pressure: If the patient has hypertension (boolean)
#### - platelets: Platelets in the blood (kiloplatelets/mL)
#### - serum_creatinine: Level of serum creatinine in the blood (mg/dL)
#### - serum_sodium: Level of serum sodium in the blood (mEq/L)

In [None]:
heart_data = pd.read_csv(io.BytesIO(myfile['heart_failure_clinical_records_dataset.csv']))
heart_data.head(5)

In [None]:
plt.rcParams['figure.figsize'] = (12,8)
sns.heatmap(heart_data.corr(), annot=True, cmap='YlGnBu')
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (12,8)
sns.pairplot(heart_data)
plt.show()

# Auto Correlation plot

In [None]:
# lag=1
example_data['x1_1'] = example_data['x1'].shift(1)

In [None]:
example_data.head(5)

In [None]:
plt.title('Scatter plot x1 & x1_1', fontsize=14)
plt.scatter(example_data['x1'], example_data['x1_1'])
plt.xlabel('x1', fontsize=14)
plt.ylabel('x1_1', fontsize=14)
plt.show()

In [None]:
# lag=2
example_data['x1_2'] = example_data['x1'].shift(2)
example_data.head(5)

In [None]:
plt.title('Scatter plot x1 & x1_2', fontsize=14)
plt.scatter(example_data['x1'], example_data['x1_2'])
plt.xlabel('x1', fontsize=14)
plt.ylabel('x1_2', fontsize=14)
plt.show()

# Dubin-Waston Test

In [None]:
pip install statsmodels

In [None]:
from statsmodels.formula.api import ols
import seaborn as sns
linear_data = pd.read_csv(io.BytesIO(myfile['linear-trend-data.csv']))
linear_model = ols('Sales ~ Time', data=linear_data).fit()
linear_model.summary()

In [None]:
linear_predict = linear_model.predict(linear_data['Time'])
linear_data['residual'] = linear_data['Sales'] - linear_predict
linear_data.head(5)

In [None]:
linear_data['residual_1'] = linear_data['residual'].shift(1)
linear_data.head(5)

In [None]:
linear_data = linear_data.fillna(0)
linear_data.head(5)

In [None]:
def dubin_waston(resid, resid_1):
    return sum((resid-resid_1)**2)/sum(resid**2)

In [None]:
dw_value = dubin_waston(linear_data['residual'], linear_data['residual_1'])
print(f'Dubin watson statistic is {dw_value:.3f}')

# Transformation time-series data

In [None]:
airline_data = pd.read_csv(io.BytesIO(myfile['international-airline-passengers.csv']))
airline_data.rename(columns={airline_data.columns[1]: 'passengers'}, inplace=True)

In [None]:
fig = plt.figure(figsize=(12,8))
plt.plot(airline_data['passengers'])
plt.title('International airline passengers', fontsize=12)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Passengers', fontsize=12)

plt.show()

In [None]:
airline_data['squared_passengers'] = np.sqrt(airline_data['passengers'])

fig = plt.figure(figsize=(12,8))
plt.plot(airline_data['squared_passengers'])
plt.title('International airline passengers', fontsize=12)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Passengers', fontsize=12)

plt.show()

In [None]:
airline_data['log_passengers'] = np.log(airline_data['passengers'])

fig = plt.figure(figsize=(12,8))
plt.plot(airline_data['log_passengers'])
plt.title('International airline passengers', fontsize=12)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Passengers', fontsize=12)

plt.show()

In [None]:
airline_data['2log_passengers'] = np.log(airline_data['log_passengers'])

fig = plt.figure(figsize=(12,8))
plt.plot(airline_data['2log_passengers'])
plt.title('International airline passengers', fontsize=12)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Passengers', fontsize=12)

plt.show()