# Quant_L&D-1 (TASK 3)

### DATA CLEANING

In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

df = pd.read_csv("lc_2016_2017.csv", low_memory=False, dtype={"desc": str, "verification_status_joint": str})

print("Initial dataset shape:", df.shape)
print(df.info())

thresh = len(df) * 0.5
df = df.dropna(thresh=thresh, axis=1)

num_cols = df.select_dtypes(include=[np.number]).columns
for col in num_cols:
    df[col] = df[col].fillna(df[col].median())

cat_cols = df.select_dtypes(exclude=[np.number]).columns
for col in cat_cols:
    df[col] = df[col].fillna(df[col].mode()[0])

le = LabelEncoder()
for col in cat_cols:
    df[col] = le.fit_transform(df[col])

df['loan_status'] = df['loan_status'].replace(['Charged Off', 'Default'], 1)
df['loan_status'] = df['loan_status'].replace(['Fully Paid', 'Current'], 0)

scaler = MinMaxScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])

df.to_csv("cleaned_data.csv", index=False)

print("Data preprocessing complete. Final shape:", df.shape)


Initial dataset shape: (759338, 72)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 759338 entries, 0 to 759337
Data columns (total 72 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   id                           759338 non-null  int64  
 1   member_id                    0 non-null       float64
 2   loan_amnt                    759338 non-null  int64  
 3   funded_amnt                  759338 non-null  int64  
 4   funded_amnt_inv              759338 non-null  float64
 5   term                         759338 non-null  object 
 6   int_rate                     759338 non-null  float64
 7   installment                  759338 non-null  float64
 8   grade                        759338 non-null  object 
 9   sub_grade                    759338 non-null  object 
 10  emp_title                    708368 non-null  object 
 11  emp_length                   708975 non-null  object 
 12  home_ownership        

### TRADE BACKTEST

In [None]:
import numpy as np
import pandas as pd
import pandas_ta as ta
import math
from backtesting import Backtest, Strategy
import yfinance as yf

class GaussianChannelStrategy(Strategy):
    atr_multiplier = 1.5
    atr_period = 14
    risk_reward_ratio = 2.0
    initial_risk_per_trade = 0.1
    current_amount = 100000

    def init(self):
        hlc3 = (self.data.High + self.data.Low + self.data.Close) / 3

        def calculate_gaussian():
            beta = (1 - np.cos(2 * np.pi / 144)) / (np.sqrt(2) - 1)
            alpha = -beta + np.sqrt(beta ** 2 + 2 * beta)
            ema_length = int(2 / alpha - 1) if alpha != 0 else 20
            return ta.ema(pd.Series(hlc3), length=ema_length).ffill().values

        self.gaussian = self.I(calculate_gaussian)

        def calculate_atr():
            return ta.atr(pd.Series(self.data.High), pd.Series(self.data.Low), pd.Series(self.data.Close), length=self.atr_period).ffill().values

        self.atr = self.I(calculate_atr)

        def calculate_stoch():
            stoch = ta.stochrsi(pd.Series(self.data.Close), length=14, rsi_length=14, k=3, d=3)
            return stoch['STOCHRSIk_14_14_3_3'].ffill().values

        self.stoch_k = self.I(calculate_stoch)

        self.high_band = self.I(lambda: self.gaussian + self.atr * 1.414)
        self.low_band = self.I(lambda: self.gaussian - self.atr * 1.414)

    def next(self):
        if len(self.data.Close) < 50:
            return

        price_above_band = self.data.Close[-1] > self.high_band[-1]
        gaussian_rising = self.gaussian[-1] > self.gaussian[-2]
        stoch_overbought = self.stoch_k[-1] > 70

        if price_above_band and gaussian_rising and stoch_overbought:
            self.buy()

        if self.position and self.data.Close[-1] < self.gaussian[-1]:
            self.position.close()

    def buy(self):
        entry = self.data.Close[-1]
        stop_loss = entry - self.atr[-1] * self.atr_multiplier
        take_profit = entry + (entry - stop_loss) * self.risk_reward_ratio
        risk_per_trade = self.initial_risk_per_trade
        stop_loss_perc = abs(entry - stop_loss) / entry
        trade_size = risk_per_trade / stop_loss_perc
        qty = math.ceil(trade_size / entry)

        self.buy(size=qty, sl=stop_loss, tp=take_profit)

data = yf.download('GOOGL', start='2015-01-01', end='2025-01-01')
bt = Backtest(data, GaussianChannelStrategy, cash=10000, commission=.002, margin=0.01)

stats = bt.run()
bt.plot()