# librerias

In [25]:
import streamlit as st
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import pickle
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score


In [13]:
def load_housing():
    # Load your weather dataset from a CSV file
    df = pd.read_csv('./housing.csv')
    return df

In [14]:
load_housing()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY
...,...,...,...,...,...,...,...,...,...,...
20635,-121.09,39.48,25.0,1665.0,374.0,845.0,330.0,1.5603,78100.0,INLAND
20636,-121.21,39.49,18.0,697.0,150.0,356.0,114.0,2.5568,77100.0,INLAND
20637,-121.22,39.43,17.0,2254.0,485.0,1007.0,433.0,1.7000,92300.0,INLAND
20638,-121.32,39.43,18.0,1860.0,409.0,741.0,349.0,1.8672,84700.0,INLAND


In [19]:
df = pd.read_csv('./housing.csv')

In [22]:
df.columns

Index(['longitude', 'latitude', 'housing_median_age', 'total_rooms',
       'total_bedrooms', 'population', 'households', 'median_income',
       'median_house_value', 'ocean_proximity'],
      dtype='object')

In [26]:
df.median_house_value.min()

14999.0

In [15]:
def get_model(algorithm):
    if algorithm == 'Linear Regression':
        model = LinearRegression()
    elif algorithm == 'Random Forest Regressor':
        model = RandomForestRegressor(n_estimators=100, random_state=42)
    elif algorithm == 'Support Vector Regressor':
        model = SVR()
    return model

In [24]:
def train_model(df, algorithm='Linear Regression'):
    # Function to train a regression model
    X = df[['longitude', 'latitude', 'housing_median_age', 'total_rooms',
       'total_bedrooms', 'population', 'households', 'median_income', 'ocean_proximity']]
    y = df['median_house_value']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = get_model(algorithm)
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test)

    # Save the model to a pickle file
    with open(f'{algorithm.lower().replace(" ", "_")}_model.pkl', 'wb') as model_file:
        pickle.dump(model, model_file)

    return model

In [None]:
def evaluate_model(model, X_test, y_test):
    # Evaluate the model
    y_pred = model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    explained_var = explained_variance_score(y_test, y_pred)

    # Calculate Adjusted R-squared
    n = len(y_test)
    k = X_test.shape[1]
    adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - k - 1))

In [None]:
def evaluate_model(model, X_test, y_test):
    #....
    # Display metrics
    st.subheader('Model Evaluation Metrics:')
    st.write(f'Mean Squared Error (MSE): {mse:.2f}')
    st.write(f'Root Mean Squared Error (RMSE): {rmse:.2f}')
    st.write(f'Mean Absolute Error (MAE): {mae:.2f}')
    st.write(f'R-squared (R²): {r2:.4f}')
    st.write(f'Adjusted R-squared: {adj_r2:.4f}')
    st.write(f'Explained Variance Score: {explained_var:.4f}')

    # Display scatter plot and residual plot
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    ax1.scatter(y_test, y_pred, alpha=0.7)
    ax1.set_title('Actual vs. Predicted')
    ax1.set_xlabel('Actual')
    ax1.set_ylabel('Predicted')

    residuals = y_test - y_pred
    ax2.scatter(y_test, residuals, alpha=0.7)
    ax2.set_title('Residuals')
    ax2.set_xlabel('Actual')
    ax2.set_ylabel('Residuals')
    st.pyplot(fig)

In [None]:
# Main Streamlit app
def main():
    st.title('LA Prices predict')

    # Load weather dataset
    df = load_weather_dataset()

    # Select regression algorithm
    algorithm = st.sidebar.selectbox('Select Regression Algorithm',
                                     ['Linear Regression', 'Random Forest Regressor', 'Support Vector Regressor'])

    # Train the model
    model = train_model(df, algorithm)

    # Streamlit UI
    st.sidebar.header('User Input Features')
    temperature = st.sidebar.slider('Temperature (C)', df['Temperature (C)'].min(), df['Temperature (C)'].max(),
                                    df['Temperature (C)'].mean())
    humidity = st.sidebar.slider('Humidity', df['Humidity'].min(), df['Humidity'].max(), df['Humidity'].mean())
    pressure = st.sidebar.slider('Pressure (millibars)', df['Pressure (millibars)'].min(),
                                 df['Pressure (millibars)'].max(), df['Pressure (millibars)'].mean())
    wind_bearing = st.sidebar.slider('Wind Bearing (degrees)', df['Wind Bearing (degrees)'].min(),
                                     df['Wind Bearing (degrees)'].max(), df['Wind Bearing (degrees)'].mean())
    visibility = st.sidebar.slider('Visibility (km)', df['Visibility (km)'].min(), df['Visibility (km)'].max(),
                                   df['Visibility (km)'].mean())

    # Predict apparent temperature
    if st.sidebar.button('Predict'):
        predicted_apparent_price = predict_apparent_price(model, temperature, humidity,
                                                                      pressure, wind_bearing, visibility)
        st.sidebar.success(f'Predicted price house: {predicted_apparent_price:.2f}  (km/h)')
        print(f'Predicted price house: {predicted_apparent_price:.2f}  (km/h)')

if __name__ == '__main__':
    main()