In [1]:
"""
Notebook de diagnostic - Environnement SageMaker
Projet : Syst√®me de Recommandation MovieLens
Auteur : Gninninmaguignon Silu√©
Date : Octobre 2025
"""

# ============================================
# PARTIE 1 : V√âRIFICATION DE L'ENVIRONNEMENT
# ============================================

import sys
import boto3
import sagemaker
import torch
import pandas as pd
import numpy as np
from datetime import datetime

print("=" * 60)
print("üîç DIAGNOSTIC DE L'ENVIRONNEMENT SAGEMAKER")
print("=" * 60)

# --- Version Python ---
print(f"\n‚úÖ Python version: {sys.version}")

# --- Versions des biblioth√®ques ---
print(f"\nüì¶ Biblioth√®ques install√©es :")
print(f"  - boto3 (AWS SDK): {boto3.__version__}")
print(f"  - sagemaker: {sagemaker.__version__}")
print(f"  - PyTorch: {torch.__version__}")
print(f"  - pandas: {pd.__version__}")
print(f"  - numpy: {np.__version__}")

# --- V√©rification GPU ---
print(f"\nüñ•Ô∏è GPU disponible: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"  - Nom du GPU: {torch.cuda.get_device_name(0)}")
    print(f"  - Nombre de GPUs: {torch.cuda.device_count()}")
else:
    print("  ‚ö†Ô∏è Mode CPU (normal pour le d√©veloppement)")

# ============================================
# PARTIE 2 : CONFIGURATION AWS
# ============================================

print("\n" + "=" * 60)
print("‚òÅÔ∏è CONFIGURATION AWS")
print("=" * 60)

# --- Session SageMaker ---
sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
role = sagemaker.get_execution_role()

print(f"\n‚úÖ R√©gion AWS: {region}")
print(f"‚úÖ Role IAM: {role}")

# --- Bucket S3 par d√©faut ---
default_bucket = sagemaker_session.default_bucket()
print(f"\n‚úÖ Bucket S3 par d√©faut: s3://{default_bucket}")

# --- Informations sur le compte ---
sts_client = boto3.client('sts')
account_info = sts_client.get_caller_identity()
print(f"\n‚úÖ Account ID: {account_info['Account']}")
print(f"‚úÖ User ARN: {account_info['Arn']}")

# ============================================
# PARTIE 3 : STRUCTURE DU PROJET
# ============================================

print("\n" + "=" * 60)
print("üìÅ CR√âATION DE LA STRUCTURE DU PROJET")
print("=" * 60)

import os

# Cr√©er la structure de dossiers
folders = [
    'data/raw',
    'data/processed',
    'data/sample',
    'models/saved_models',
    'models/checkpoints',
    'outputs/metrics',
    'outputs/plots',
    'outputs/logs',
    'src'
]

for folder in folders:
    os.makedirs(folder, exist_ok=True)
    print(f"‚úÖ Cr√©√©: {folder}/")

# ============================================
# PARTIE 4 : TEST DE CONNEXION S3
# ============================================

print("\n" + "=" * 60)
print("üóÑÔ∏è TEST DE CONNEXION S3")
print("=" * 60)

s3_client = boto3.client('s3')

# Cr√©er un fichier test
test_file = 'outputs/logs/test_connection.txt'
with open(test_file, 'w') as f:
    f.write(f"Test de connexion S3 - {datetime.now()}\n")
    f.write(f"Projet: Syst√®me de Recommandation MovieLens\n")
    f.write(f"R√©gion: {region}\n")

# Upload vers S3
s3_prefix = 'recommender-system-project'
s3_path = f"s3://{default_bucket}/{s3_prefix}/logs/test_connection.txt"

try:
    sagemaker_session.upload_data(
        path=test_file,
        bucket=default_bucket,
        key_prefix=f"{s3_prefix}/logs"
    )
    print(f"‚úÖ Fichier test upload√© avec succ√®s vers S3")
    print(f"   Chemin: {s3_path}")
except Exception as e:
    print(f"‚ùå Erreur lors de l'upload vers S3: {e}")

# ============================================
# PARTIE 5 : R√âSUM√â ET PROCHAINES √âTAPES
# ============================================

print("\n" + "=" * 60)
print("üìä R√âSUM√â DU DIAGNOSTIC")
print("=" * 60)

print("\n‚úÖ ENVIRONNEMENT PR√äT POUR LE PROJET")
print("\nüìã Informations importantes √† noter :")
print(f"  1. Bucket S3: {default_bucket}")
print(f"  2. R√©gion: {region}")
print(f"  3. Pr√©fixe S3 du projet: {s3_prefix}")
print(f"  4. Role IAM: {role}")

print("\nüöÄ PROCHAINES √âTAPES :")
print("  1. ‚úÖ T√©l√©charger le dataset MovieLens vers S3")
print("  2. ‚è≥ Exploration des donn√©es (EDA)")
print("  3. ‚è≥ Preprocessing et feature engineering")
print("  4. ‚è≥ Entra√Ænement du mod√®le")
print("  5. ‚è≥ D√©ploiement sur SageMaker Endpoint")

# Sauvegarder les configurations
config = {
    'bucket': default_bucket,
    'region': region,
    'role': role,
    's3_prefix': s3_prefix,
    'date': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}

import json
with open('outputs/logs/config.json', 'w') as f:
    json.dump(config, f, indent=2)

print("\n‚úÖ Configuration sauvegard√©e dans: outputs/logs/config.json")
print("\n" + "=" * 60)
print("‚ú® DIAGNOSTIC TERMIN√â AVEC SUCC√àS !")
print("=" * 60)

sagemaker.config INFO - Fetched defaults config from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
üîç DIAGNOSTIC DE L'ENVIRONNEMENT SAGEMAKER

‚úÖ Python version: 3.11.11 | packaged by conda-forge | (main, Dec  5 2024, 14:17:24) [GCC 13.3.0]

üì¶ Biblioth√®ques install√©es :
  - boto3 (AWS SDK): 1.39.11
  - sagemaker: 2.245.0
  - PyTorch: 2.6.0
  - pandas: 2.3.1
  - numpy: 1.26.4

üñ•Ô∏è GPU disponible: False
  ‚ö†Ô∏è Mode CPU (normal pour le d√©veloppement)

‚òÅÔ∏è CONFIGURATION AWS
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMak