# Morningstar Pro - Entraînement avancé sur Colab

## Système complet de trading algorithmique avec données sociales

Ce notebook permet :
- De télécharger les données de marché (OHLCV) depuis un exchange crypto
- D'ajouter des indicateurs techniques avancés
- D'intégrer des données sociales (GitHub et Reddit)
- D'entraîner un modèle de deep learning pour le trading

## 1. Installation des dépendances

In [None]:
# Installation des dépendances système et Python
!pip install -q tensorflow==2.12.0 pandas==1.5.3 numpy==1.23.5 ccxt==4.1.91 ta pyarrow scikit-learn asyncpraw tweepy aiohttp

In [None]:
# Clonage du dépôt et ajout du chemin Morningstar
!git clone https://github.com/Cabrel10/eva001.git
import sys
sys.path.insert(0, '/content/eva001')

In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Morningstar Pro - Entraînement avancé sur Colab\n",
    "\n",
    "Ce notebook permet de choisir dynamiquement les paires et l’intervalle de dates pour télécharger les données, puis d’entraîner le modèle Morningstar sur Colab."
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Installation des dépendances système et Python\n",
    "!pip install -q tensorflow==2.12.0 pandas==1.5.3 numpy==1.23.5 ccxt==4.1.91 ta pyarrow scikit-learn asyncpraw tweepy aiohttp PyGithub praw"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Clonage du dépôt et ajout du chemin Morningstar\n",
    "!git clone https://github.com/Cabrel10/eva001.git\n",
    "import sys\n",
    "sys.path.insert(0, '/content/eva001')"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Sélection interactive des paires et des dates\n",
    "import datetime\n",
    "default_pairs = 'BTC/USDT,ETH/USDT,BNB/USDT,SOL/USDT'\n",
    "pairs = input(f\"Entrez les paires séparées par une virgule (exemple: {default_pairs}): \") or default_pairs\n",
    "pairs = [p.strip() for p in pairs.split(',')]\n",
    "start_date = input(\"Date de début (YYYY-MM-DD, défaut 2023-01-01): \") or '2023-01-01'\n",
    "end_date = input(\"Date de fin (YYYY-MM-DD, défaut aujourd'hui): \") or str(datetime.date.today())"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Configuration des APIs sociales\n",
    "from github import Github\n",
    "import praw\n",
    "\n",
    "# Config GitHub (remplacer par ton token)\n",
    "github_token = input(\"Entrez votre token GitHub (ou laissez vide pour désactiver): \") or None\n",
    "gh = Github(github_token) if github_token else None\n",
    "\n",
    "# Config Reddit (remplacer par tes credentials)\n",
    "reddit = praw.Reddit(\n",
    "    client_id=input(\"Reddit client_id: \") or None,\n",
    "    client_secret=input(\"Reddit client_secret: \") or None,\n",
    "    user_agent=\"Morningstar Data Collector\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Fonctions pour données sociales\n",
    "def get_github_stats(repo_name):\n",
    "    if not gh:\n",
    "        return None, None, None, None, None\n",
    "    try:\n",
    "        repo = gh.get_repo(repo_name)\n",
    "        return (\n",
    "            repo.get_commits().totalCount,\n",
    "            repo.stargazers_count,\n",
    "            repo.forks_count,\n",
    "            repo.get_issues(state='open').totalCount,\n",
    "            repo.get_issues(state='closed').totalCount\n",
    "        )\n",
    "    except:\n",
    "        return None, None, None, None, None\n",
    "\n",
    "def get_reddit_sentiment(subreddit, pair):\n",
    "    if not reddit:\n",
    "        return None\n",
    "    try:\n",
    "        submissions = reddit.subreddit(subreddit).search(f\"{pair} flair:Discussion\", limit=10)\n",
    "        return sum(1 for s in submissions if s.score > 0) / 10  # Ratio de posts positifs\n",
    "    except:\n",
    "        return None"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Pipeline de données avancé\n",
    "from Morningstar.utils.data_manager import ExchangeDataManager\n",
    "from Morningstar.utils.custom_indicators import add_technical_indicators\n",
    "import pandas as pd\n",
    "import asyncio\n",
    "\n",
    "# Correction pour l'event loop Colab/Jupyter\n",
    "!pip install nest_asyncio\n",
    "import nest_asyncio\n",
    "nest_asyncio.apply()\n",
    "\n",
    "async def fetch_data(pairs, timeframe='1d', start_date=None, end_date=None):\n",
    "    exchange = ExchangeDataManager(exchange_name=\"kucoin\")\n",
    "    await exchange.load_markets_async()\n",
    "    all_data = []\n",
    "    for pair in pairs:\n",
    "        print(f\"Téléchargement {pair}...\")\n",
    "        df = await exchange.load_data(pair, timeframe, start_date, end_date)\n",
    "        if not df.empty:\n",
    "            df['pair'] = pair\n",
    "            all_data.append(df)\n",
    "    await exchange.close()\n",
    "    if all_data:\n",
    "        return pd.concat(all_data)\n",
    "    else:\n",
    "        raise ValueError(\"Aucune donnée téléchargée.\")\n",
    "\n",
    "raw_data = asyncio.get_event_loop().run_until_complete(fetch_data(pairs, '1h', start_date, end_date))"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Prétraitement et sauvegarde\n",
    "def prepare_dataset(df):\n",
    "    # Réinitialiser l'index pour éviter les doublons\n",
    "    df = df.reset_index(drop=True)\n",
    "    \n",
    "    # Ajouter les indicateurs techniques\n",
    "    df = add_technical_indicators(df)\n",
    "    \n",
    "    # Récupérer les données sociales pour chaque paire\n",
    "    for pair in df['pair'].unique():\n",
    "        # Exemple: mapper BTC/USDT à un repo GitHub et subreddit\n",
    "        repo_map = {\n",
    "            'BTC/USDT': 'bitcoin/bitcoin',\n",
    "            'ETH/USDT': 'ethereum/go-ethereum',\n",
    "            'BNB/USDT': 'binance-chain/docs',\n",
    "            'SOL/USDT': 'solana-labs/solana'\n",
    "        }\n",
    "        \n",
    "        subreddit_map = {\n",
    "            'BTC/USDT': 'Bitcoin',\n",
    "            'ETH/USDT': 'ethereum',\n",
    "            'BNB/USDT': 'binance',\n",
    "            'SOL/USDT': 'solana'\n",
    "        }\n",
    "        \n",
    "        if pair in repo_map:\n",
    "            commits, stars, forks, issues_opened, issues_closed = get_github_stats(repo_map[pair])\n",
    "            mask = df['pair'] == pair\n",
    "            df.loc[mask, 'commits'] = commits\n",
    "            df.loc[mask, 'stars'] = stars\n",
    "            df.loc[mask, 'forks'] = forks\n",
    "            df.loc[mask, 'issues_opened'] = issues_opened\n",
    "            df.loc[mask, 'issues_closed'] = issues_closed\n",
    "            \n",
    "        if pair in subreddit_map:\n",
    "            sentiment = get_reddit_sentiment(subreddit_map[pair], pair.split('/')[0])\n",
    "            df.loc[df['pair'] == pair, 'reddit_sentiment'] = sentiment\n",
    "    \n",
    "    # Colonnes finales\n",
    "    columns = [\n",
    "        'open', 'high', 'low', 'close', 'volume', 'rsi', 'macd', 'macd_signal', 'macd_hist',\n",
    "        'bb_upper', 'bb_middle', 'bb_lower', 'volume_ma', 'volume_anomaly', 'pair',\n",
    "        'commits', 'stars', 'forks', 'issues_opened', 'issues_closed', 'reddit_sentiment', 'datetime'\n",
    "    ]\n",
    "    \n",
    "    # S'assurer que toutes les colonnes existent\n",
    "    for col in columns:\n",
    "        if col not in df.columns:\n",
    "            df[col] = None\n",
    "            \n",
    "    return df[columns]\n",
    "\n",
    "data = prepare_dataset(raw_data)\n",
    "data.to_parquet('full_dataset.parquet')\n",
    "print(f\"Dataset final: {data.shape}\")"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Entraînement du modèle Morningstar\n",
    "import tensorflow as tf\n",
    "from Morningstar.workflows.training_workflow import TrainingWorkflow\n",
    "class ColabConfig:\n",
    "    def __init__(self):\n",
    "        self.time_window = 50\n",
    "        self.features = data.columns.tolist()\n",
    "        self.epochs = 200\n",
    "        self.batch_size = 1024\n",
    "        self.dataset_path = 'full_dataset.parquet'\n",
    "colab_config = ColabConfig()\n",
    "workflow = TrainingWorkflow(colab_config)\n",
    "tf_dataset = workflow._prepare_dataset(data)\n",
    "dataset_size = tf.data.experimental.cardinality(tf_dataset).numpy()\n",
    "val_size = int(dataset_size * 0.2)\n",
    "train_dataset = tf_dataset.skip(val_size)\n",
    "val_dataset = tf_dataset.take(val_size)\n",
    "with tf.distribute.MirroredStrategy().scope():\n",
    "    inputs = tf.keras.Input(shape=(50, len(data.columns)))\n",
    "    x = tf.keras.layers.Conv1D(128, 5, activation='swish')(inputs)\n",
    "    x = tf.keras.layers.BatchNormalization()(x)\n",
    "    x = tf.keras.layers.LSTM(256, return_sequences=True)(x)\n",
    "    x = tf.keras.layers.LSTM(128)(x)\n",
    "    x = tf.keras.layers.Dense(64, activation='swish')(x)\n",
    "    outputs = tf.keras.layers.Dense(1)(x)\n",
    "    model = tf.keras.Model(inputs, outputs)\n",
    "    model.compile(\n",
    "        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),\n",
    "        loss='huber',\n",
    "        metrics=['mae']\n",
    "    )\n",
    "callbacks = [\n",
    "    tf.keras.callbacks.ModelCheckpoint('best_model.h5', save_best_only=True),\n",
    "    tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5),\n",
    "    tf.keras.callbacks.TensorBoard(log_dir='./logs')\n",
    "]\n",
    "history = model.fit(\n,
    "    train_dataset,\n",
    "    validation_data=val_dataset,\n",
    "    epochs=colab_config.epochs,\n",
    "    batch_size=colab_config.batch_size,\n",
    "    callbacks=callbacks\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Sauvegarde finale et export sur Google Drive\n",
    "model.save('morningstar_pro.h5')\n",
    "from google.colab import drive\n",
    "drive.mount('/content/drive')\n",
    "!cp morningstar_pro.h5 '/content/drive/MyDrive/Colab Data/'"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}


In [None]:
# Pipeline de données avancé
from Morningstar.utils.data_manager import ExchangeDataManager
from Morningstar.utils.custom_indicators import add_technical_indicators
import pandas as pd
import asyncio

async def fetch_data(pairs, timeframe='1d', start_date=None, end_date=None):
    exchange = ExchangeDataManager(exchange_name="binance")
    await exchange.load_markets_async()
    all_data = []
    for pair in pairs:
        print(f"Téléchargement {pair}...")
        df = await exchange.load_data(pair, timeframe, start_date, end_date)
        if not df.empty:
            df['pair'] = pair
            all_data.append(df)
    await exchange.close()
    if all_data:
        return pd.concat(all_data)
    else:
        raise ValueError("Aucune donnée téléchargée.")

raw_data = asyncio.run(fetch_data(pairs, '1h', start_date, end_date))

In [None]:
# Prétraitement et sauvegarde
def prepare_dataset(df):
    df = add_technical_indicators(df)
    columns = [
        'open', 'high', 'low', 'close', 'volume', 'rsi', 'macd', 'macd_signal', 'macd_hist',
        'bb_upper', 'bb_middle', 'bb_lower', 'volume_ma', 'volume_anomaly', 'pair',
        'commits', 'stars', 'forks', 'issues_opened', 'issues_closed', 'datetime'
    ]
    for col in columns:
        if col not in df.columns:
            df[col] = None
    df = df[columns]
    return df

data = prepare_dataset(raw_data)
data.to_parquet('full_dataset.parquet')
print(f"Dataset final: {data.shape}")

In [None]:
# Entraînement du modèle Morningstar
import tensorflow as tf
from Morningstar.workflows.training_workflow import TrainingWorkflow
class ColabConfig:
    def __init__(self):
        self.time_window = 50
        self.features = data.columns.tolist()
        self.epochs = 200
        self.batch_size = 1024
        self.dataset_path = 'full_dataset.parquet'
colab_config = ColabConfig()
workflow = TrainingWorkflow(colab_config)
tf_dataset = workflow._prepare_dataset(data)
dataset_size = tf.data.experimental.cardinality(tf_dataset).numpy()
val_size = int(dataset_size * 0.2)
train_dataset = tf_dataset.skip(val_size)
val_dataset = tf_dataset.take(val_size)
with tf.distribute.MirroredStrategy().scope():
    inputs = tf.keras.Input(shape=(50, len(data.columns)))
    x = tf.keras.layers.Conv1D(128, 5, activation='swish')(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.LSTM(256, return_sequences=True)(x)
    x = tf.keras.layers.LSTM(128)(x)
    x = tf.keras.layers.Dense(64, activation='swish')(x)
    outputs = tf.keras.layers.Dense(1)(x)
    model = tf.keras.Model(inputs, outputs)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='huber',
        metrics=['mae']
    )
callbacks = [
    tf.keras.callbacks.ModelCheckpoint('best_model.h5', save_best_only=True),
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5),
    tf.keras.callbacks.TensorBoard(log_dir='./logs')
]
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=colab_config.epochs,
    batch_size=colab_config.batch_size,
    callbacks=callbacks
)

In [None]:
# Sauvegarde finale et export sur Google Drive
model.save('morningstar_pro.h5')
from google.colab import drive
drive.mount('/content/drive')
!cp morningstar_pro.h5 '/content/drive/MyDrive/Colab Data/'