In [1]:
pip install gspread pandas scikit-learn oauth2client

Collecting gspread
  Downloading gspread-6.1.4-py3-none-any.whl.metadata (11 kB)
Collecting oauth2client
  Downloading oauth2client-4.1.3-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting google-auth>=1.12.0 (from gspread)
  Downloading google_auth-2.36.0-py2.py3-none-any.whl.metadata (4.7 kB)
Collecting google-auth-oauthlib>=0.4.1 (from gspread)
  Downloading google_auth_oauthlib-1.2.1-py2.py3-none-any.whl.metadata (2.7 kB)
Collecting httplib2>=0.9.1 (from oauth2client)
  Downloading httplib2-0.22.0-py3-none-any.whl.metadata (2.6 kB)
Collecting rsa>=3.1.4 (from oauth2client)
  Downloading rsa-4.9-py3-none-any.whl.metadata (4.2 kB)
Collecting requests-oauthlib>=0.7.0 (from google-auth-oauthlib>=0.4.1->gspread)
  Downloading requests_oauthlib-2.0.0-py2.py3-none-any.whl.metadata (11 kB)
Collecting oauthlib>=3.0.0 (from requests-oauthlib>=0.7.0->google-auth-oauthlib>=0.4.1->gspread)
  Downloading oauthlib-3.2.2-py3-none-any.whl.metadata (7.5 kB)
Downloading gspread-6.1.4-py3-none-any.whl 

In [1]:
pip install pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [1]:
pip install gspread oauth2client


Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import numpy as np
import time
import os
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, mean_squared_error

# Function to read and preprocess data
def read_and_preprocess_data(file_path):
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return None, None, None
    
    df = pd.read_csv(file_path)
    
    # Print the columns to debug
    print("Columns in the DataFrame:", df.columns.tolist())
    
    # Trim whitespace from column names
    df.columns = df.columns.str.strip()
    
    # Check if required columns exist
    required_columns = ['Room Occupancy', 'People', 'Temperature', 'Humidity', 'Time Occupied']
    for col in required_columns:
        if col not in df.columns:
            raise KeyError(f"Column '{col}' is not in the DataFrame.")
    
    # Encode 'Room Occupancy' if it's categorical
    label_encoder = LabelEncoder()
    df['Room Occupancy'] = label_encoder.fit_transform(df['Room Occupancy'])
    
    # Features and targets
    X = df[['People', 'Temperature', 'Humidity']]  # Features
    y_class = df['Room Occupancy']  # Classification target
    y_reg = df['Time Occupied']  # Regression target
    
    return X, y_class, y_reg

# Function to train models
def train_models(X, y_class, y_reg):
    X_train, X_test, y_class_train, y_class_test, y_reg_train, y_reg_test = train_test_split(
        X, y_class, y_reg, test_size=0.2, random_state=42)
    
    # Train Random Forest Classifier
    classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    classifier.fit(X_train, y_class_train)
    
    # Train Random Forest Regressor
    regressor = RandomForestRegressor(n_estimators=100, random_state=42)
    regressor.fit(X_train, y_reg_train)
    
    return classifier, regressor, X_test, y_class_test, y_reg_test, X_train

# Function to make predictions
def make_predictions(classifier, regressor, X):
    class_predictions = classifier.predict(X)
    reg_predictions = regressor.predict(X)
    return class_predictions, reg_predictions

# Main loop to read data in real-time
file_path = 'data.csv'  # Path to your CSV file
classifier, regressor = None, None

# Initial training
try:
    X, y_class, y_reg = read_and_preprocess_data(file_path)
    if X is not None and y_class is not None and y_reg is not None:
        classifier, regressor, X_test, y_class_test, y_reg_test, X_train = train_models(X, y_class, y_reg)
        
        # Provide a random test sample from the training data
        random_index = np.random.randint(0, len(X_train))
        print(f'Random Test Sample from Training Data: {X_train.iloc[random_index].to_dict()}')
    else:
        print("Initial data loading failed. Exiting.")
        exit(1)
        
except Exception as e:
    print(f"An error occurred during initial training: {e}")
    exit(1)

# Real-time data processing loop
while True:
    try:
        print("Reading real-time data...")
        X_real_time, _, _ = read_and_preprocess_data(file_path)
        
        if X_real_time is not None:
            # Make predictions on the real-time data
            class_predictions, reg_predictions = make_predictions(classifier, regressor, X_real_time)
            
            # Print predictions
            print(f'Predictions for Room Occupancy: {class_predictions}')
            print(f'Predictions for Time Occupied: {reg_predictions}')
        else:
            print("Real-time data loading failed.")
        
    except Exception as e:
        print(f"An error occurred during real-time processing: {e}")
    
    time.sleep(5)  # Wait for 5 seconds before reading the file again
