In [1]:
import pandas as pd
import numpy as np
import logging
import os
import pandas as pd
from torch.utils.data import Dataset
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

In [2]:
logging.basicConfig(level=logging.INFO)

In [3]:
class DataLoader:
    def __init__(self, file_path, test_size=0.2, val_size=0.1):
        self.file_path = file_path
        self.test_size = test_size
        self.val_size = val_size

    def load_and_split_data(self):
        try:
            data = pd.read_csv(self.file_path)
            # Splitting the data into train, validation, and test sets
            train_data, test_data = train_test_split(data, test_size=self.test_size, random_state=42)
            train_data, val_data = train_test_split(train_data, test_size=self.val_size / (1 - self.test_size), random_state=42)
            logging.info("Data loaded and split successfully.")
            return train_data, val_data, test_data
        except Exception as e:
            logging.error(f"Error loading and splitting data: {e}")
            raise

In [4]:
class DataPreprocessor:
    def __init__(self, data):
        self.data = data

    def preprocess(self):
        # Implement preprocessing steps like normalization, handling missing values etc.
        # Example: self.data = (self.data - np.mean(self.data)) / np.std(self.data)
        logging.info("Data preprocessing completed.")
        return self.data

In [5]:
class PyTorchModel(nn.Module):
    def __init__(self, input_size):
        super(PyTorchModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 1)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [6]:
class ModelBuilder:
    def build_model(self, input_size):
        model = PyTorchModel(input_size)
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.MSELoss()
        logging.info(f"PyTorch model built successfully. Optimizer: {optimizer}, Loss: {criterion}")
        return model, optimizer, criterion

In [7]:
class CustomWeatherDataset(Dataset):
    def __init__(self, csv_file, transform=None, target_transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied on a sample.
            target_transform (callable, optional): Optional transform to be applied on the target.
        """
        self.weather_data = pd.read_csv(csv_file)
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.weather_data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Assuming the last column is the target
        features = self.weather_data.iloc[idx, :-1].values
        target = self.weather_data.iloc[idx, -1]

        # Convert to tensor
        features = torch.tensor(features, dtype=torch.float32)
        target = torch.tensor(target, dtype=torch.float32)

        if self.transform:
            features = self.transform(features)

        if self.target_transform:
            target = self.target_transform(target)

        return features, target

In [8]:
class Trainer:
    def __init__(self, model, optimizer, criterion, train_csv, test_csv, batch_size=32, epochs=10):
        self.model = model
        self.optimizer = optimizer
        self.criterion = criterion
        self.epochs = epochs
        self.batch_size = batch_size

        # Load datasets
        self.train_data = CustomWeatherDataset(csv_file=train_csv)
        self.test_data = CustomWeatherDataset(csv_file=test_csv)

        # Create data loaders
        self.train_loader = DataLoader(self.train_data, batch_size=self.batch_size, shuffle=True)
        self.test_loader = DataLoader(self.test_data, batch_size=self.batch_size)

        logging.info("Data loaders created successfully.")

    def train(self):
        self.model.train()
        for epoch in range(self.epochs):
            running_loss = 0.0
            for features, labels in self.train_loader:
                self.optimizer.zero_grad()
                outputs = self.model(features)
                loss = self.criterion(outputs.squeeze(), labels)
                loss.backward()
                self.optimizer.step()
                running_loss += loss.item()
            average_loss = running_loss / len(self.train_loader)
            logging.info(f"Epoch {epoch+1}/{self.epochs}, Loss: {average_loss:.4f}")

    def evaluate(self):
        self.model.eval()
        total_loss = 0.0
        with torch.no_grad():
            for features, labels in self.test_loader:
                outputs = self.model(features)
                loss = self.criterion(outputs.squeeze(), labels)
                total_loss += loss.item()
        average_loss = total_loss / len(self.test_loader)
        logging.info(f"Test Loss: {average_loss:.4f}")

    def visualize_batch(self):
        train_features, train_labels = next(iter(self.train_loader))
        logging.info(f"Feature batch shape: {train_features.size()}")
        logging.info(f"Labels batch shape: {train_labels.size()}")
        example_features = train_features[0]
        example_label = train_labels[0]
        plt.plot(example_features.numpy(), label='Features')
        plt.title(f'Example Weather Data Features with Label: {example_label.item()}')
        plt.xlabel('Feature Index')
        plt.ylabel('Feature Value')
        plt.legend()
        plt.show()

In [9]:
class PredictionVisualizer:
    def __init__(self, model, data):
        self.model = model
        self.data = data

    def visualize_future_forecast(self, future_data):
        predictions = self.model.predict(future_data)
        plt.figure(figsize=(10, 6))
        plt.plot(predictions)
        plt.title("Future Temperature Forecast")
        plt.xlabel("Time")
        plt.ylabel("Temperature")
        plt.savefig("future_forecast.png")
        plt.show()

    def compare_actual_vs_predicted(self, test_data):
        actual = test_data['target']
        predicted = self.model.predict(test_data.drop('target', axis=1))
        plt.figure(figsize=(10, 6))
        plt.plot(actual, label='Actual')
        plt.plot(predicted, label='Predicted')
        plt.title("Comparison of Actual vs Predicted Temperatures")
        plt.xlabel("Time")
        plt.ylabel("Temperature")
        plt.legend()
        plt.savefig("actual_vs_predicted.png")
        plt.show()

In [10]:
class ReportGenerator:
    def __init__(self, actual, predicted):
        self.actual = actual
        self.predicted = predicted

    def generate_report(self):
        report_df = pd.DataFrame({
            'Actual': self.actual,
            'Predicted': self.predicted.squeeze(),
            'Difference': self.actual - self.predicted.squeeze()
        })
        report_df['Error'] = report_df['Difference'].abs()
        report_df['Squared Error'] = report_df['Error'] ** 2

        # Additional statistics
        report_df['Mean Actual'] = self.actual.mean()
        report_df['Mean Predicted'] = self.predicted.mean()
        report_df['Standard Deviation Actual'] = self.actual.std()
        report_df['Standard Deviation Predicted'] = self.predicted.std()

        report_df.to_csv('report.csv', index=False)
        logging.info("Report generated and saved as report.csv.")

In [11]:
def main():
    file_path = 'example_temperature_data.csv'
    
    # Initialize DataLoader and split data
    data_loader = DataLoader(file_path, test_size=0.2, val_size=0.1)
    train_data, val_data, test_data = data_loader.load_and_split_data()
    
    # Preprocess data
    preprocessor = DataPreprocessor(train_data)
    processed_data = preprocessor.preprocess()

    # Assuming your data has features and a target column
    input_size = train_data.drop('target', axis=1).shape[1]  # Adjust 'target' as per your dataset

    # Build and train the model
    builder = ModelBuilder()
    model, optimizer, criterion = builder.build_model(input_size)
    trainer = Trainer(model, optimizer, criterion, train_data, val_data, test_data, batch_size=32, epochs=10)
    trainer.train()
    trainer.evaluate()

    # Visualization and report generation (optional)
    # These steps would require additional data or modifications
    # depending on your specific use case and available data

    # Example usage (modify as needed):
    # visualizer = PredictionVisualizer(model, processed_data)
    # future_data = ... # Load or create your future data for prediction
    # visualizer.visualize_future_forecast(future_data)
    # test_data = ... # Subset of processed_data or separate test data
    # visualizer.compare_actual_vs_predicted(test_data)

    # Generate report (modify as needed):
    # actual_values = ... # Actual values from your dataset
    # predicted_values = model.predict(...) # Predictions from your model
    # report_generator = ReportGenerator(actual_values, predicted_values)
    # report_generator.generate_report()

if __name__ == "__main__":
    main()


INFO:root:Data loaded and split successfully.


NameError: name 'data' is not defined