In [None]:
!nvidia-smi


## 2. Clone Your Repository

In [None]:
!git clone https://github.com/yourusername/llm-finetuning-project.git
%cd llm-finetuning-project


##  3. Install Dependencies


In [None]:
!pip install -r requirements.txt

## 4. Set Up AWS Credentials

In [None]:
import os

# Replace these with your actual AWS credentials
os.environ["AWS_ACCESS_KEY_ID"] = "your_access_key_id"
os.environ["AWS_SECRET_ACCESS_KEY"] = "your_secret_access_key"
os.environ["AWS_REGION"] = "us-east-1"  # Replace with your preferred region

# Verify credentials are set
print("AWS credentials configured successfully!")


## 5. Test AWS S3 Access


In [None]:
import boto3

s3 = boto3.client('s3')
bucket_name = "llm-finetuning-rahman-1234"  # Replace with your bucket name

try:
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix="data/processed/", MaxKeys=10)
    if 'Contents' in response:
        print("Files in S3 bucket:")
        for obj in response['Contents']:
            print(f"- {obj['Key']}")
    else:
        print(f"No files found in {bucket_name}/data/processed/")
    print("\nS3 connection successful!")
except Exception as e:
    print(f"Error accessing S3: {str(e)}")


## 6. Create Temporary Directories

In [None]:
!mkdir -p models/qwen-0.5b-finetuned
!mkdir -p mlruns
!mkdir -p checkpoints

## 7. Set Up Python Path


In [None]:
import sys
sys.path.append('.')
print(f"Current working directory: {os.getcwd()}")


## 8. Modify Configuration File

In [None]:
import yaml

# Configuration for Colab environment
config_path = "configs/training/qwen_training_config.yaml"

# Load config
with open(config_path, "r") as f:
    config = yaml.safe_load(f)

# Update for Colab environment
config["training"]["batch_size"] = 2  # Smaller batch size for Colab
config["training"]["gradient_accumulation_steps"] = 4
config["training"]["fp16"] = True
config["training"]["num_epochs"] = 1  # For testing, use just 1 epoch

# Save updated config
with open("configs/training/qwen_colab_config.yaml", "w") as f:
    yaml.dump(config, f)

print("Configuration updated for Colab environment!")


## 9. Run Training Script

In [None]:
!PYTHONPATH=. python src/training/train.py --config configs/training/qwen_colab_config.yaml


## 10. Verify Trained Model

In [None]:
!ls -la models/qwen-0.5b-finetuned


## 11. Upload Results to S3 (Optional)


In [None]:
from src.training.train import upload_to_s3

local_model_path = "models/qwen-0.5b-finetuned"
s3_output_path = "s3://llm-finetuning-rahman-1234/models/qwen-0.5b-finetuned"

print(f"Uploading model from {local_model_path} to {s3_output_path}...")
upload_to_s3(local_model_path, s3_output_path)
print("Upload complete!")