# Deep Learning for Image Classification:
## * Build and fine-tune convolutional neural networks (CNNs) for image classification tasks ## * using frameworks like TensorFlow o r PyTorch. Explore advanced architectures like transfer learning with pre-trained models and conduct hyperparameter tuning.

In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torch.optim as optim
from torch.utils.data import DataLoader

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_epochs = 10
batch_size = 64
learning_rate = 0.001

# CIFAR-10 dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Load pre-trained ResNet50 model
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features

# Replace the last fully connected layer for transfer learning
model.fc = nn.Linear(num_ftrs, 10)  # 10 classes in CIFAR-10 dataset

# Send the model to GPU if available
model = model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Fine-tuning the model
total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_steps}], Loss: {loss.item():.4f}')

# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the test images: {100 * correct / total}%')

ModuleNotFoundError: No module named 'torch'

# Natural Language Processing (NLP) Projects:
## * Develop advanced NLP models for tasks like machine translation, text summarization, or language generation.
## * Work with transformer-based models like BERT or GPT-3.

In [2]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load pre-trained GPT-2 model and tokenizer
model_name = 'gpt2-medium'  # You can choose different sizes of GPT-2: 'gpt2', 'gpt2-medium', 'gpt2-large', 'gpt2-xl'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()

# Set seed for reproducibility
torch.manual_seed(42)

# Generate text based on a given prompt
def generate_text(prompt, max_length=100, temperature=0.7):
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    output = model.generate(input_ids, max_length=max_length, temperature=temperature, num_return_sequences=1)
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated_text

# Example prompt
prompt = "Once upon a time, in a land far far away"
generated_text = generate_text(prompt, max_length=150, temperature=0.7)
print(generated_text)


ModuleNotFoundError: No module named 'torch'

# Time Series Forecasting at Scale:
## * Build large-scale time series forecasting models for industries like finance, energy, or demand forecasting.
## * Implement distributed computing with technologies like Dask or Apache Spark

In [3]:
pip install dask[complete]
pip install pystan==2.19.1.1
pip install fbprophet

SyntaxError: invalid syntax (3099231369.py, line 1)

In [4]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
import dask.dataframe as dd
from dask.distributed import Client, LocalCluster

# Create a Dask cluster
cluster = LocalCluster()
client = Client(cluster)

# Generate synthetic time series data
def generate_time_series(start_date, end_date, freq='D'):
    dates = pd.date_range(start=start_date, end=end_date, freq=freq)
    values = np.random.randn(len(dates))
    return pd.DataFrame({'ds': dates, 'y': values})

# Define function for parallel forecasting
def forecast_prophet(df):
    prophet = Prophet()
    prophet.fit(df)
    future = prophet.make_future_dataframe(periods=30)  # Forecasting 30 days ahead
    forecast = prophet.predict(future)
    return forecast[['ds', 'yhat']]

# Generate synthetic time series data (you can replace this with your own data)
start_date = '2024-01-01'
end_date = '2024-12-31'
time_series_data = generate_time_series(start_date, end_date)

# Convert the pandas DataFrame to a Dask DataFrame
dask_df = dd.from_pandas(time_series_data, npartitions=4)

# Perform parallel forecasting using Dask
forecast_df = dask_df.groupby('division').apply(forecast_prophet, meta={'ds': 'datetime64[ns]', 'yhat': 'float'})

# Compute the results
forecast_df = forecast_df.compute()

# Shutdown the Dask client and cluster
client.close()
cluster.close()

# Print the forecasted values
print(forecast_df)


ModuleNotFoundError: No module named 'fbprophet'

# Recommendation Systems:
## * Create personalized recommendation systems using collaborative filtering or content-based approaches.
## * Explore matrix factorization or deep learning techniques for recommendations.

In [6]:
pip install scikit-surprise
from surprise import Dataset, Reader
from surprise import SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Load the MovieLens dataset (replace file_path with your actual file path)
file_path = 'path/to/ml-100k/u.data'
reader = Reader(line_format='user item rating timestamp', sep='\t')
data = Dataset.load_from_file(file_path, reader)

# Split the data into train and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Train the SVD algorithm
model = SVD()
model.fit(trainset)

# Make predictions on the test data
predictions = model.test(testset)

# Evaluate the model
accuracy.rmse(predictions)

# Generate recommendations for a given user
user_id = str(196)  # Replace with any user ID
user_ratings = []
for movie_id in range(1, 1683):  # Total number of movies in the dataset
    user_ratings.append((user_id, str(movie_id), 4.0))  # Assume a rating of 4.0 for unrated movies
user_predictions = model.test(user_ratings)
recommended_movies = sorted(user_predictions, key=lambda x: x.est, reverse=True)[:10]  # Get top 10 recommendations

# Print recommended movies
print("Top 10 recommended movies for user", user_id)
for movie in recommended_movies:
    print(movie)



Collecting scikit-surprise
  Using cached scikit-surprise-1.1.3.tar.gz (771 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'error'
Note: you may need to restart the kernel to use updated packages.


  ERROR: Error [WinError 225] Operation did not complete successfully because the file contains a virus or potentially unwanted software while executing command python setup.py egg_info
ERROR: Could not install packages due to an OSError: [WinError 225] Operation did not complete successfully because the file contains a virus or potentially unwanted software

