In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sb 

from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import LogisticRegression 
from sklearn.svm import SVC 
from xgboost import XGBClassifier 
from sklearn import metrics 


import warnings 
warnings.filterwarnings('ignore')


In [None]:
df = pd.read_csv('TSLA.csv') 
df.head()


In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
plt.figure(figsize=(15,5)) 
plt.plot(df['Close']) 
plt.title('Tesla Close price.', fontsize=15) 
plt.ylabel('Price in dollars.') 
plt.show()


In [None]:
df.head()


In [None]:
df[df['Close'] == df['Adj Close']].shape


In [None]:
if 'Adj Close' in df.columns:
    df = df.drop(['Adj Close'], axis=1)

In [None]:
df.isnull().sum()


In [None]:
features = ['Open', 'High', 'Low', 'Close', 'Volume'] 

plt.figure(figsize=(20, 10))
for i, col in enumerate(features):
    plt.subplot(2, 3, i + 1)
    sb.histplot(df[col], kde=True)
plt.show()


In [None]:
plt.figure(figsize=(20, 10))
for i, col in enumerate(features):
    plt.subplot(2, 3, i + 1)
    sb.boxplot(df[col])
plt.show()

In [None]:
df[['year', 'month', 'day']] = df['Date'].str.split('-', expand=True).astype(int)

In [None]:
print(df.head())


In [None]:
df['is_quarter_end'] = np.where(df['month'] % 3 == 0, 1, 0)

In [None]:
print(df.head())

In [None]:
data_grouped = df.drop(columns=['Date']).groupby('year').mean()

print(data_grouped)

In [None]:
plt.figure(figsize=(20, 10))
for i, col in enumerate(['Open', 'High', 'Low', 'Close']):
    plt.subplot(2, 2, i + 1)
    data_grouped[col].plot.bar()
plt.show()


In [None]:
data_grouped = df.drop(columns=['Date']).groupby('is_quarter_end').mean()

print(data_grouped)

In [None]:
df['open-close'] = df['Open'] - df['Close']
df['low-high'] = df['Low'] - df['High']

In [None]:
print(df)

In [None]:
df['target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)

In [None]:
plt.pie(df['target'].value_counts().values, labels=[0, 1], autopct='%1.1f%%')
plt.show()

In [None]:
corr_matrix = df.drop(columns=['Date']).corr()

In [None]:
plt.figure(figsize=(10, 10))
sb.heatmap(corr_matrix > 0.9, annot=True, cbar=False)
plt.show()

In [None]:
features = df[['open-close', 'low-high', 'is_quarter_end']]
target = df['target']

In [None]:
features = df[['open-close', 'low-high', 'is_quarter_end']]
target = df['target']

In [None]:
scaler = StandardScaler()
features = scaler.fit_transform(features)

In [None]:
X_train, X_valid, Y_train, Y_valid = train_test_split(features, target, test_size=0.1, random_state=2022)
print(X_train.shape, X_valid.shape)

In [None]:
models = [LogisticRegression(), SVC(kernel='poly', probability=True), XGBClassifier()]

In [None]:
for model in models:
    model.fit(X_train, Y_train)
    print(f'{model}:')
    print('Training Accuracy:', metrics.roc_auc_score(Y_train, model.predict_proba(X_train)[:, 1]))
    print('Validation Accuracy:', metrics.roc_auc_score(Y_valid, model.predict_proba(X_valid)[:, 1]))
    print()


In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import itertools

# Function to plot confusion matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_valid, models[0].predict(X_valid))

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=[0, 1],
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=[0, 1], normalize=True,
                      title='Normalized confusion matrix')

plt.show()
