## Importing the clean dataset

In [6]:
import pandas as pd
import numpy as np

df_fe = pd.read_csv(r"C:\Users\Acer\Internship\Task4\Cleaned data\stock_prices_cleaned.csv")

# Recreate 'return' if not saved in the CSV
df_fe['return'] = df_fe.groupby('Company')['Close'].pct_change()

# Then proceed with modeling:
df_fe['target'] = (df_fe.groupby('Company')['return'].shift(-1) > 0).astype(int)
df_fe.dropna(subset=['target'], inplace=True)

## modeling for Regression 

In [7]:
import statsmodels.api as sm
from sklearn.model_selection import train_test_split

# Binary target: Did the stock go up tomorrow?
df_fe['target'] = (df_fe.groupby('Company')['return'].shift(-1) > 0).astype(int)
df_fe.dropna(subset=['target'], inplace=True)

X = df_fe[['Open High', 'Low Close', 'Open Low']]  # sample features
X = sm.add_constant(X)
y = df_fe['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

model = sm.Logit(y_train, X_train)
result = model.fit()
print(result.summary())


Optimization terminated successfully.
         Current function value: 0.692498
         Iterations 3
                           Logit Regression Results                           
Dep. Variable:                 target   No. Observations:               479428
Model:                          Logit   Df Residuals:                   479424
Method:                           MLE   Df Model:                            3
Date:                Fri, 08 Aug 2025   Pseudo R-squ.:               1.245e-06
Time:                        14:23:11   Log-Likelihood:            -3.3200e+05
converged:                       True   LL-Null:                   -3.3200e+05
Covariance Type:            nonrobust   LLR p-value:                    0.8431
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0719      0.003     24.757      0.000       0.066       0.078
Open High    -9.1e-08   2.36e

### Interpretation

In [8]:
# Odds ratios
np.exp(result.params)

const        1.074498
Open High    1.000000
Low Close    1.000000
Open Low     1.000000
dtype: float64

In [9]:
# Confidence intervals
result.conf_int()

Unnamed: 0,0,1
const,0.06616544,0.0775425
Open High,-5.538463e-07,3.71847e-07
Low Close,-2.748157e-07,4.240741e-07
Open Low,-5.791058e-07,6.202203e-07
