### **1. House Price Prediction System**

In [None]:
from re import X
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.linear_model  import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

df = pd.read_csv('house_data.csv')

#preprocessing
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)

#returns non numeric(string) column names in the form of list
nonNumericData = df.select_dtypes(include=['object']).columns.tolist()
print(f"numeric data, {nonNumericData}")

# convert categorical data into binary
if nonNumericData:
  df = pd.get_dummies(df, columns=nonNumericData, drop_first=True)

if 'price' not in df.columns:
    raise KeyError("The 'price' column is not present in the DataFrame.")

X = df.drop(columns = ['price'])
Y = df['price']

# Ensure all feature columns are numeric
if not np.issubdtype(X.dtypes.values[0], np.number):
   raise ValueError("Some features are still non-numeric. Check the dataset preprocessing.")

#  20% of the data goes to the test set, and 80% goes to the training set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train,Y_train)

Y_pred = model.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

print(f'Model Evaluation:\nMSE: {mse:.2f}, R-squared: {r2:.2f}')


numeric data, ['municipality_name', 'object_type_name']
Model Evaluation:
MSE: 149210817251.74, R-squared: 0.49


### **2. Salary Prediction System**

In [12]:
!pip install streamlit -q

In [15]:
%%writefile salary_prediction_app.py
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import streamlit as st

# Load the dataset
df = pd.read_csv('salary_data.csv')

# Data preprocessing
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)

categorical_columns = ['degree', 'location', 'job_title']
existing_columns = [col for col in categorical_columns if col in df.columns]
if existing_columns:
    df = pd.get_dummies(df, columns=existing_columns, drop_first=True)

if 'Salary' not in df.columns:
    raise KeyError("The 'Salary' column is not present in the DataFrame.")

X = df.drop(columns=['Salary'])
Y = df['Salary']

# Split the data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Train the model
model = LinearRegression()
model.fit(X_train, Y_train)

# Model evaluation
mse = mean_squared_error(Y_test, model.predict(X_test))
r2 = r2_score(Y_test, model.predict(X_test))

# Streamlit app
st.title('Salary Prediction System')

st.write("### Model Evaluation")
st.write(f'MSE: {mse:.2f}')
st.write(f'R-squared: {r2:.2f}')

# Plot actual vs predicted salaries
st.write("### Actual vs Predicted Salaries")
plt.plot(Y_test.values, label='Actual')
plt.plot(model.predict(X_test), label='Predicted')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.title('Actual vs. Predicted Salaries')
plt.legend()
st.pyplot(plt)

# User input for prediction
st.write("### Predict Salary")
years_of_experience = st.number_input('Years of Experience', min_value=0.0, max_value=50.0, value=5.0)

if st.button('Predict'):
    prediction = model.predict([[years_of_experience]])
    st.write(f'Predicted Salary: {prediction[0]:.2f}')

Overwriting salary_prediction_app.py


In [16]:
!wget -q -O - ipv4.icanhazip.com

34.138.170.184


In [17]:
!streamlit run salary_prediction_app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.138.170.184:8501[0m
[0m
[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0Kyour url is: https://nasty-pianos-report.loca.lt
[34m  Stopping...[0m
^C


### **3. Stock Price Trend Prediction**

In [None]:
!pip install streamlit -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m44.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m55.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
%%writefile app.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import yfinance as yf
import streamlit as st

def get_stock_data(ticker):
  stock = yf.Ticker(ticker)
  df = stock.history(period='5y')
  df= df[['Close','Volume']].dropna()
  df['Day'] = np.arange(len(df))
  return df

ticker = 'AAPL'
df = get_stock_data(ticker)

X = df.drop(columns=['Close'])
Y = df['Close']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, Y_train)

Y_pred = model.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

print(f'Model Evaluation:\nMSE: {mse:.2f}, R-squared: {r2:.2f}')

st.title('Stock Price Trend Prediction')
st.write(f'Stock: {ticker}')

fig, ax = plt.subplots()
ax.plot(df['Day'], df['Close'], label='Actual Prices', color='blue')
ax.scatter(X_test['Day'], Y_pred, label='Predicted Prices',color='red')
ax.set_xlabel('Day')
ax.set_ylabel('Price')
ax.legend()
st.pyplot(fig)

def predict_price(day, volume):
  input_data = np.array([[day, volume]]).reshape(1, -1)
  return model.predict(input_data)[0]

day = st.sidebar.number_input('Enter Future Day:', min_value=int(df['Day'].min()), max_value=int(df['Day'].max())+30)
volume = st.sidebar.number_input('Enter Expected Volume:', min_value=int(df['Volume'].min()), max_value=int(df['Volume'].max()))

if st.sidebar.button('Predict'):
  prediction = predict_price(day, volume)
  st.sidebar.write(f'Predicted Stock Price: ${prediction:.2f}')


Writing app.py


In [None]:
!wget -q -O - ipv4.icanhazip.com

35.229.174.126


In [None]:
!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.229.174.126:8501[0m
[0m
[1G[0K⠧[1G[0K⠇[1G[0K[1G[0JNeed to install the following packages:
localtunnel@2.0.2
Ok to proceed? (y) [20Gy

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0Kyour url is: https://poor-sites-tease.loca.lt
Model Evaluation:
MSE: 268.57, R-squared: 0.84
Model Evaluation:
MSE: 268.57, R-squared: 0.84
Model Evaluation:
MSE: 268.57, R-squared: 0.84
[34m  Stopping...[0m
^C


### **4. Customer Churn Prediction for a Subscription Service**

In [None]:
%%writefile customer_churn_app.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import streamlit as st

df = pd.read_csv('customer_churn.csv')
df.dropna(inplace=True)

# Drop unnecessary columns
df.drop(columns=['Names', 'Location', 'Company', 'Onboard_date'], inplace=True)

X = df.drop(columns=['Churn'])
Y = df['Churn']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, Y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(Y_test, y_pred)
class_report = classification_report(Y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}\n')
print(f'Classification Report:\n{class_report}\n')

# Streamlit app
st.title('Customer Churn Prediction')
st.write(f'Accuracy: {accuracy:.2f}')

sns.heatmap(confusion_matrix(Y_test, y_pred), annot=True, fmt='d', cmap='Blues', xticklabels=['No Churn', 'Churn'], yticklabels=['No Churn', 'Churn'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
st.pyplot(plt)

# Sidebar for user input
st.sidebar.header('Predict Customer Churn')
features = {col: st.sidebar.number_input(f'Enter {col}:', float(X[col].min()), float(X[col].max())) for col in X.columns}

# Predict button
if st.sidebar.button('Predict'):
  input_data = np.array([features[col] for col in X.columns]).reshape(1, -1)

  prediction = model.predict(input_data)[0]
  st.sidebar.write(f'Predicted Churn: {"Yes" if prediction else "No"}')

  high_risk_customers = X_test[y_pred == 1]  # Customers predicted to churn
  st.write("High-Risk Customers:")
  st.write(high_risk_customers)


Overwriting customer_churn_app.py


In [None]:
!wget -q -O - ipv4.icanhazip.com

34.75.23.249


In [None]:
!streamlit run customer_churn_app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.75.23.249:8501[0m
[0m
[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0Kyour url is: https://cool-icons-drum.loca.lt
Accuracy: 0.90

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.96      0.94       148
           1       0.77      0.62      0.69        32

    accuracy                           0.90       180
   macro avg       0.85      0.79      0.82       180
weighted avg       0.89      0.90      0.90       180


Accuracy: 0.90

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.96      0.94       148
           1       0.77      0.62      0

### **5. Energy Consumption Prediction**

In [5]:
!pip install streamlit -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m61.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m87.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [9]:
%%writefile energy_consumption_app.py

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import streamlit as st

df = pd.read_csv('owid-energy-data.csv')

# Identify the correct column for energy consumption
st.write("Dataset Columns: ",df.columns.tolist())
energy_columns = [col for col in df.columns if "consumption" in col.lower()]

if not energy_columns:
  raise KeyError("No column related to energy consumption found in the dataset.")

# Use the first identified energy consumption column
energy_column = energy_columns[0]
st.write(f"Using '{energy_column}' as the target variable.")

df = df.dropna()

categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
if categorical_cols:
  df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)



X = df.drop(columns=[energy_column])
Y = df[energy_column]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, Y_train)

Y_pred = model.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

print(f'Model Evaluation:\nMSE: {mse:.2f}, R-squared: {r2:.2f}')

st.title('Energy Consumption Prediction')
st.write(f'MSE: {mse:.2f}, R-squared: {r2:.2f}')

fig, ax = plt.subplots()
ax.scatter(Y_test, Y_pred, alpha=0.5, color='blue')
ax.plot([Y_test.min(), Y_test.max()], [Y_test.min(), Y_test.max()], 'r', lw=2)
ax.set_xlabel('Actual Energy Consumption')
ax.set_ylabel('Predicted Energy Consumption')
ax.set_title('Actual vs. Predicted Energy Consumption')
st.pyplot(fig)

# User input for prediction
st.sidebar.header('Predict Energy Consumption')
features = {col: st.sidebar.number_input(f'Enter {col}:', float(df[col].min()), float(df[col].max())) for col in X.columns}
if st.sidebar.button('Predict'):
  input_data = np.array([features[col] for col in X.columns]).reshape(1, -1)

  prediction = model.predict(input_data)[0]
  st.sidebar.write(f'Predicted Energy Consumption: {prediction:.2f} kWh')


Writing energy_consumption_app.py


In [10]:
!wget -q -O - ipv4.icanhazip.com

34.138.170.184


In [11]:
!streamlit run energy_consumption_app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.138.170.184:8501[0m
[0m
[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K[1G[0JNeed to install the following packages:
localtunnel@2.0.2
Ok to proceed? (y) [20Gy

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0Kyour url is: https://orange-games-float.loca.lt
Model Evaluation:
MSE: 2.04, R-squared: 1.00
Model Evaluation:
MSE: 2.04, R-squared: 1.00
[34m  Stopping...[0m
^C
