<a href="https://colab.research.google.com/github/MaheshUmale/GoogleCOLABFiles/blob/main/TradingIMAGETraining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# --- Define Your Permanent Paths ---
# This makes your code clean and easy to manage
DRIVE_PROJECT_PATH = '/content/drive/MyDrive/Colab_Trading_AI/'
DATA_PATH = DRIVE_PROJECT_PATH + 'data/'
MODEL_SAVE_PATH = DRIVE_PROJECT_PATH # Save models to the root project folder

# --- Copy Data from Drive to Fast Session Storage (Run Once per Session) ---
print("Copying zipped images from Drive to local session storage for speed...")
# The exclamation mark lets you run a shell command
!cp '{DATA_PATH}trade_images_clean.zip' /content/

print("Unzipping images...")
!unzip -q /content/trade_images_clean.zip -d /content/
print("Unzipping complete. Images are now in /content/trade_images_clean/")

Copying zipped images from Drive to local session storage for speed...
Unzipping images...
Unzipping complete. Images are now in /content/trade_images_clean/


In [None]:
!cp '{DATA_PATH}hybrid_model_dataset.csv' /content/

In [None]:
# No major installs needed, TensorFlow is pre-installed on Colab
import tensorflow as tf
from tensorflow.keras import layers, models, applications
import pandas as pd
import numpy as np
import os
import pickle
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

print("TensorFlow Version:", tf.__version__)

TensorFlow Version: 2.19.0


In [None]:
################################################################################
#
#       COMPLETE SCRIPT FOR TRAINING A HYBRID TRADING AI MODEL
#       Designed for Google Colab to prevent RAM crashes.
#
################################################################################

# === SECTION 1: SETUP AND CONFIGURATION ===

# --- 1.1 Connect to Google Drive for permanent storage ---
from google.colab import drive
drive.mount('/content/drive')

# --- 1.2 Import all necessary libraries ---
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd
import numpy as np
import os
import pickle
import gc # Garbage Collector for memory management
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

print("TensorFlow Version:", tf.__version__)

# --- 1.3 Define all paths and training parameters ---
# IMPORTANT: Make sure this path matches your folder structure in Google Drive
DRIVE_PROJECT_PATH = '/content/drive/MyDrive/Colab_Trading_AI/'
DATA_PATH = DRIVE_PROJECT_PATH + 'data/'
MODEL_SAVE_PATH = DRIVE_PROJECT_PATH

# Model and Data Parameters (Optimized from our discussion)
MAX_SEQUENCE_LENGTH = 45  # To fit your longest description
VOCAB_SIZE = 100          # To fit your ~72 unique words with a buffer
IMG_SIZE = 224            # Required input size for MobileNetV2
BATCH_SIZE = 32           # Number of samples per training step (can be 16 to save more RAM)
EPOCHS = 15               # Number of times to train on the entire dataset

# === SECTION 2: DATA PREPARATION ===

# --- 2.1 Copy data from slow Drive to fast Colab session storage (runs once) ---
print("Copying zipped images from Drive to local session storage for speed...")
!cp '{DATA_PATH}trade_images_clean.zip' /content/

print("Unzipping images... (This may take a minute)")
!unzip -q /content/trade_images_clean.zip -d /content/
print("Unzipping complete. Images are now in /content/trade_images_clean/")

# --- 2.2 Load the CSV from Google Drive ---
print("Loading CSV from Google Drive...")
df = pd.read_csv(DATA_PATH + 'hybrid_model_dataset.csv')

# --- 2.3 IMPORTANT: Adjust Image Paths ---
# The paths in the CSV are relative. We prepend the local Colab path where we unzipped them.
df['image_path'] = '/content/trade_images_clean/' + df['image_path'].astype(str)
print(f"Loaded {len(df)} records. Sample image path: {df['image_path'].iloc[0]}")


# === SECTION 3: TEXT PREPROCESSING ===

print("Processing text data...")
# --- 3.1 Initialize and fit the tokenizer with our optimized vocabulary size ---
tokenizer = Tokenizer(num_words=VOCAB_SIZE, oov_token="<unk>")
tokenizer.fit_on_texts(df['text_description'])

# --- 3.2 Convert text to padded numerical sequences ---
text_sequences = tokenizer.texts_to_sequences(df['text_description'])
padded_text = pad_sequences(text_sequences, maxlen=MAX_SEQUENCE_LENGTH, padding='post', truncating='post')

# --- 3.3 Prepare labels and final image paths for splitting ---
labels = df['label'].values
image_paths = df['image_path'].values


# === SECTION 4: EFFICIENT DATA PIPELINE (THE RAM CRASH SOLUTION) ===

print("Splitting data and creating efficient tf.data pipelines...")
# --- 4.1 Split data into training (80%) and validation (20%) sets ---
X_train_paths, X_val_paths, X_train_text, X_val_text, y_train, y_val = train_test_split(
    image_paths, padded_text, labels, test_size=0.2, random_state=42, stratify=labels
)

# --- 4.2 Define the function that loads and preprocesses one image ---
# This function is the core of the "conveyor belt" that prevents RAM overload.
def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    image = tf.keras.applications.mobilenet_v2.preprocess_input(image) # Preprocessing specific to MobileNetV2
    return image

# --- 4.3 Define the function that creates the complete dataset generator ---
def create_dataset(image_paths, text_data, labels):
    # Create datasets from the raw data (still very low RAM usage)
    path_ds = tf.data.Dataset.from_tensor_slices(image_paths)
    image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    text_ds = tf.data.Dataset.from_tensor_slices(text_data.astype(np.int32))
    label_ds = tf.data.Dataset.from_tensor_slices(labels)

    # Zip the inputs and labels together in the required structure: ((input_1, input_2), label)
    dataset = tf.data.Dataset.zip(((image_ds, text_ds), label_ds))

    # Shuffle, batch, and prefetch for maximum performance
    dataset = dataset.shuffle(buffer_size=1024).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return dataset

# --- 4.4 Create the final training and validation datasets ---
train_ds = create_dataset(X_train_paths, X_train_text, y_train)
val_ds = create_dataset(X_val_paths, X_val_text, y_val)
print("Data pipelines created successfully.")


# === SECTION 5: MEMORY MANAGEMENT ===

print("Cleaning up large variables from RAM before training...")
# This is a crucial step to maximize available memory for the training process.
del df, image_paths, padded_text, labels, text_sequences
del X_train_paths, X_val_paths, X_train_text, X_val_text, y_train, y_val
gc.collect() # Ask the garbage collector to reclaim the memory.
print("Cleanup complete.")


# === SECTION 6: MODEL ARCHITECTURE ===

print("Building the hybrid model...")
# --- 6.1 Vision Branch ("The Eye") using pre-trained MobileNetV2 ---
base_vision_model = tf.keras.applications.MobileNetV2(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights='imagenet'
)
base_vision_model.trainable = False # Freeze the pre-trained weights

image_input = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3), name='image_input')
x = base_vision_model(image_input, training=False)
x = layers.GlobalAveragePooling2D()(x)
vision_output = layers.Dense(128, activation='relu', name='vision_output')(x)

# --- 6.2 Text Branch ("The Ear") ---
text_input = layers.Input(shape=(MAX_SEQUENCE_LENGTH,), name='text_input')
y = layers.Embedding(input_dim=VOCAB_SIZE, output_dim=64)(text_input)
y = layers.LSTM(64)(y)
text_output = layers.Dense(64, activation='relu', name='text_output')(y)

# --- 6.3 Fusion and Classifier ("The Brain") ---
combined = layers.Concatenate()([vision_output, text_output])
z = layers.Dense(64, activation='relu')(combined)
z = layers.Dropout(0.5)(z)
z = layers.Dense(32, activation='relu')(z)
final_output = layers.Dense(1, activation='sigmoid', name='final_output')(z)

# --- 6.4 Create and Compile the final Hybrid Model ---
hybrid_model = models.Model(inputs=[image_input, text_input], outputs=final_output)
hybrid_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)
hybrid_model.summary()


# === SECTION 7: TRAINING THE MODEL ===

print("\nStarting model training...")
history = hybrid_model.fit(
    train_ds,
    epochs=EPOCHS,
    validation_data=val_ds
)
print("\nTraining complete.")


# === SECTION 8: SAVING THE FINAL ASSETS ===

print("Saving trained model and tokenizer to Google Drive...")
# --- 8.1 Save the trained model ---
hybrid_model.save(MODEL_SAVE_PATH + 'trading_hybrid_model_v1.h5')
print(f"Model saved to {MODEL_SAVE_PATH}trading_hybrid_model_v1.h5")

# --- 8.2 Save the tokenizer (CRITICAL for live prediction) ---
with open(MODEL_SAVE_PATH + 'tokenizer_v1.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
print(f"Tokenizer saved to {MODEL_SAVE_PATH}tokenizer_v1.pickle")

print("\n\n--- ALL STEPS COMPLETED SUCCESSFULLY ---")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
TensorFlow Version: 2.19.0
Copying zipped images from Drive to local session storage for speed...
Unzipping images... (This may take a minute)
replace /content/trade_images_clean/NEGATIVE/360ONE/360ONE_20250730_124500.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
Unzipping complete. Images are now in /content/trade_images_clean/
Loading CSV from Google Drive...
Loaded 10532 records. Sample image path: /content/trade_images_clean/POSITIVE/360ONE/360ONE_20250729_120400.png
Processing text data...
Splitting data and creating efficient tf.data pipelines...
Data pipelines created successfully.
Cleaning up large variables from RAM before training...
Cleanup complete.
Building the hybrid model...



Starting model training...
Epoch 1/15
[1m264/264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 111ms/step - accuracy: 0.8681 - loss: 0.4229 - val_accuracy: 0.8676 - val_loss: 0.4294
Epoch 2/15
[1m264/264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 104ms/step - accuracy: 0.8685 - loss: 0.4035 - val_accuracy: 0.8676 - val_loss: 0.3988
Epoch 3/15
[1m264/264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 107ms/step - accuracy: 0.8638 - loss: 0.4080 - val_accuracy: 0.8676 - val_loss: 0.4345
Epoch 4/15
[1m264/264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 102ms/step - accuracy: 0.8679 - loss: 0.3976 - val_accuracy: 0.8676 - val_loss: 0.4095
Epoch 5/15
[1m264/264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 103ms/step - accuracy: 0.8639 - loss: 0.3967 - val_accuracy: 0.8676 - val_loss: 0.3966
Epoch 6/15
[1m264/264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 103ms/step - accuracy: 0.8651 - loss: 0.3964 - val_accuracy: 0.8676 -




Training complete.
Saving trained model and tokenizer to Google Drive...
Model saved to /content/drive/MyDrive/Colab_Trading_AI/trading_hybrid_model_v1.h5
Tokenizer saved to /content/drive/MyDrive/Colab_Trading_AI/tokenizer_v1.pickle


--- ALL STEPS COMPLETED SUCCESSFULLY ---
