In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# --- Step 1: Load stock data from CSV ---
full_df = pd.read_csv("/content/SCOA_A5.csv")

# Filter for a single stock (e.g., 'AAL')
ticker = 'AAL'
df = full_df[full_df['Name'] == ticker].copy()

if df.empty:
    print(f"Error: Ticker '{ticker}' not found in the CSV file.")
else:
    # Ensure data is sorted by date
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date')

    # Create the 'Target' column (1 if next day's close is higher, 0 otherwise)
    # Note: Using lowercase 'close' to match the CSV
    df['Target'] = (df['close'].shift(-1) > df['close']).astype(int)

    # Drop the last row as it will have NaN for the Target
    df = df.dropna()

    # --- Step 2: Feature selection and scaling ---
    # Using lowercase column names from the CSV
    features_list = ['open', 'high', 'low', 'close', 'volume']
    features = df[features_list]

    scaler = MinMaxScaler()
    X = scaler.fit_transform(features)
    y = df['Target'].values

    # --- Step 3: Train-test split ---
    # shuffle=False is important to keep the time-series order
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # --- Step 4: Build ANN model ---
    # The input_dim is 5 (for the 5 features we selected)
    model = Sequential()
    model.add(Dense(64, input_dim=len(features_list), activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Use the same epochs and batch size you had
    model.fit(X_train, y_train, epochs=60, batch_size=42, verbose=1)

    # --- Step 5: Evaluate model ---
    y_pred_proba = model.predict(X_test)
    y_pred = (y_pred_proba > 0.5).astype("int32")

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    print(f"\n--- Model Evaluation for {ticker} ---")
    print(f"Accuracy: {accuracy:.2f}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("---------------------------------")

Epoch 1/60


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5196 - loss: 0.6958
Epoch 2/60
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5281 - loss: 0.6927 
Epoch 3/60
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5479 - loss: 0.6909 
Epoch 4/60
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5379 - loss: 0.6914 
Epoch 5/60
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5226 - loss: 0.6908
Epoch 6/60
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5310 - loss: 0.6910 
Epoch 7/60
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5346 - loss: 0.6899 
Epoch 8/60
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5131 - loss: 0.6909 
Epoch 9/60
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

Step 1: Import libraries

```python
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
```

* **numpy**: for numerical operations
* **pandas**: for handling CSV and DataFrames
* **MinMaxScaler**: scales numerical data into [0,1] range
* **train_test_split**: splits dataset into training and testing
* **accuracy_score, confusion_matrix**: to evaluate the model’s performance
* **Sequential, Dense**: build a feed-forward neural network

---

Step 2: Load the stock data

```python
full_df = pd.read_csv("/content/SCOA_A5.csv")
```

Reads your dataset `SCOA_A5.csv` which likely has columns like `Name, date, open, high, low, close, volume`.

---

Step 3: Filter one stock (AAL)

```python
ticker = 'AAL'
df = full_df[full_df['Name'] == ticker].copy()
```

Here we only keep rows for **American Airlines (AAL)**.
If this company name doesn’t exist, it prints an error.

---

Step 4: Sort by date

```python
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date')
```

Ensures that the data is **chronologically ordered**, which is essential for time-series tasks like stock prediction.

---
Step 5: Create the Target variable

```python
df['Target'] = (df['close'].shift(-1) > df['close']).astype(int)
```

Here’s what it does:

* `df['close'].shift(-1)` → next day’s close price
* Compares with current day’s close
* If **next day’s price is higher**, `Target = 1` (buy signal)
* If **lower**, `Target = 0` (sell/hold signal)

Then:

```python
df = df.dropna()
```

Drops the last row (since it has no next-day price).

---

Step 6: Feature selection and normalization

```python
features_list = ['open', 'high', 'low', 'close', 'volume']
features = df[features_list]
```

Selecting the key 5 numeric features.

```python
scaler = MinMaxScaler()
X = scaler.fit_transform(features)
```

All feature values are normalized between 0 and 1 to help the neural network train efficiently.

```python
y = df['Target'].values
```

Target (0 or 1) is extracted as output labels.

---

Step 7: Split into training and testing sets

```python
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False)
```

* 80% data → training
* 20% data → testing
* `shuffle=False` ensures time-order is preserved (since stock data is sequential).

---

Step 8: Build the ANN model

```python
model = Sequential()
model.add(Dense(64, input_dim=len(features_list), activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
```

**Explanation:**

| Layer | Type  | Units | Activation | Purpose                                    |
| ----- | ----- | ----- | ---------- | ------------------------------------------ |
| 1     | Dense | 64    | ReLU       | Extract nonlinear patterns                 |
| 2     | Dense | 32    | ReLU       | Deeper feature learning                    |
| 3     | Dense | 1     | Sigmoid    | Output between 0 and 1 (binary prediction) |

---

Step 9: Compile model

```python
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
```

* **Loss**: Binary Crossentropy (since output is 0/1)
* **Optimizer**: Adam (adaptive learning rate)
* **Metrics**: Accuracy

---

Step 10: Train the model

```python
model.fit(X_train, y_train, epochs=60, batch_size=42, verbose=1)
```

* Trains for **60 epochs** (passes through the dataset 60 times)
* Batch size 42 (samples processed before updating weights)
* Shows progress per epoch

---

tep 11: Evaluate model

```python
y_pred_proba = model.predict(X_test)
y_pred = (y_pred_proba > 0.5).astype("int32")
```

* Predicts probabilities
* Converts them to class labels (0 or 1)

---

Step 12: Compute accuracy and confusion matrix

```python
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
```

* Accuracy = % of correct predictions
* Confusion matrix = table of correct vs incorrect predictions

---

#OUTPUT EXPLANATION

Means:

* **Epoch 1/60**: First iteration of training
* **24/24**: 24 batches processed (since total samples ÷ batch_size ≈ 24)
* **Loss = 0.6958**: Error (want it to go ↓)
* **Accuracy = 0.5196**: ~52% accuracy during training

You can observe:

* Loss **decreases slightly** (from 0.695 → ~0.68)
* Accuracy **fluctuates between 50–57%**

This means the network is learning *slightly* but is struggling to generalize — which is common with **stock data** (very noisy).

---


```
--- Model Evaluation for AAL ---
Accuracy: 0.55
Confusion Matrix:
[[83 40]
 [73 56]]
---------------------------------
```

Let’s decode this:
```
| Metric               | Meaning                                                                                                           |
| -------------------- | ----------------------------------------------------------------------------------------------------------------- |
| **Accuracy: 0.55**   | The model predicts the next-day direction correctly 55% of the time — slightly better than random guessing (50%). |
| **Confusion Matrix** |                                                                                                                   |
```
```
[[83 40]
 [73 56]]
```

| True class     | Predicted as 0 | Predicted as 1 |
| -------------- | -------------- | -------------- |
| 0 (price down) | 83             | 40             |
| 1 (price up)   | 73             | 56             |

So:

* 83 → Correctly predicted price went down
* 56 → Correctly predicted price went up
* 40 + 73 = 113 → total wrong predictions

---

INTERPRETATION

*Good signs:*

* Model is able to learn some trend signals.
* Accuracy > 50% means it’s learning *some* pattern beyond random.

**Issues / Improvements:**

1. Add more features: moving averages, RSI, MACD, etc.
2. Use time-series models (LSTM, GRU) — better for sequence data.
3. Try longer training (more epochs, smaller learning rate).
4. Feature engineering and normalization consistency are key.

---

