<a href="https://colab.research.google.com/github/AqueeqAzam/disease-diagnosis-project-using-expert-system-ml-and-nlp-flask/blob/main/disease_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# `Disease Prediction using expert sytem, machine learning and nlp with flask server`

`load dataset`

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Define the number of rows
n_rows = 10000

# Define the columns
columns = ["Fever", "Cough", "Headache", "Fatigue", "Disease"]

# Ensure balanced classes
diseases = ["COVID-19", "Flu", "Common Cold", "Healthy"]
data = []
for disease in diseases:
    for _ in range(n_rows // len(diseases)):
        data.append([
            np.random.choice([1, 0]),  # Fever
            np.random.choice([1, 0]),  # Cough
            np.random.choice([1, 0]),  # Headache
            np.random.choice([1, 0]),  # Fatigue
            disease
        ])

# Create the DataFrame
df = pd.DataFrame(data, columns=columns)

# Shuffle the data
df = df.sample(frac=1).reset_index(drop=True)

# Save to CSV
df.to_csv("medical.csv", index=False)


`Machine learning + Expert System`

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Define the number of rows
n_rows = 10000

# Define the columns
columns = ["Fever", "Cough", "Headache", "Fatigue", "Disease"]

# Ensure balanced classes
diseases = ["COVID-19", "Flu", "Common Cold", "Healthy"]
data = []
for disease in diseases:
    for _ in range(n_rows // len(diseases)):
        data.append([
            np.random.choice([1, 0]),  # Fever
            np.random.choice([1, 0]),  # Cough
            np.random.choice([1, 0]),  # Headache
            np.random.choice([1, 0]),  # Fatigue
            disease
        ])

# Create the DataFrame
df = pd.DataFrame(data, columns=columns)

# Shuffle the data
df = df.sample(frac=1).reset_index(drop=True)

# Save to CSV
df.to_csv("medical.csv", index=False)

# Load the dataset
df = pd.read_csv('medical.csv')

# Split the dataset into features and target variable
X = df.drop('Disease', axis=1)  # Features
y = df['Disease']  # Target variable

# Convert boolean features to integers
X = X.astype(int)

# Apply standard scaling to the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Initialize the Decision Tree Classifier
clf = DecisionTreeClassifier()

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Generate a classification report
report = classification_report(y_test, y_pred)
print(report)

def expert_system_rules(symptoms):
    # Enhanced rules for better prediction
    if symptoms['Fever'] == 1 and symptoms['Cough'] == 1:
        return 'COVID-19'
    elif symptoms['Cough'] == 1 and symptoms['Fatigue'] == 1:
        return 'Flu'
    elif symptoms['Cough'] == 1 and symptoms['Headache'] == 1:
        return 'Common Cold'
    else:
        # Use the machine learning model for other predictions
        symptoms_df = pd.DataFrame([symptoms])
        symptoms_scaled = scaler.transform(symptoms_df)
        return clf.predict(symptoms_scaled)[0]

def get_symptoms_from_text(input_text):
    # Initialize symptoms dictionary
    symptoms = {'Fever': 0, 'Cough': 0, 'Headache': 0, 'Fatigue': 0}

    # Convert input text to lowercase
    input_text = input_text.lower()

    # Check for presence of symptoms in input text
    if 'fever' in input_text:
        symptoms['Fever'] = 1
    if 'cough' in input_text:
        symptoms['Cough'] = 1
    if 'headache' in input_text:
        symptoms['Headache'] = 1
    if 'fatigue' in input_text:
        symptoms['Fatigue'] = 1

    return symptoms

def predict_disease(input_text):
    symptoms = get_symptoms_from_text(input_text)
    predicted_disease = expert_system_rules(symptoms)
    return predicted_disease

if __name__ == "__main__":
    # Example user input
    user_input = input("Enter your symptoms (e.g., 'I have fever and cough'): ")

    # Predict disease based on user input
    predicted_disease = predict_disease(user_input)
    print(f'Predicted Disease: {predicted_disease}')


Accuracy: 25.00%
              precision    recall  f1-score   support

    COVID-19       0.24      0.30      0.27       747
 Common Cold       0.25      0.28      0.26       733
         Flu       0.30      0.22      0.25       759
     Healthy       0.22      0.20      0.21       761

    accuracy                           0.25      3000
   macro avg       0.25      0.25      0.25      3000
weighted avg       0.25      0.25      0.25      3000

Enter your symptoms (e.g., 'I have fever and cough'): fever, cough
Predicted Disease: COVID-19


`on adding nlp`

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import spacy

# Load the spaCy model
nlp = spacy.load('en_core_web_sm')

# Define the number of rows
n_rows = 10000

# Define the columns
columns = ["Fever", "Cough", "Headache", "Fatigue", "Disease"]

# Ensure balanced classes
diseases = ["COVID-19", "Flu", "Common Cold", "Healthy"]
data = []
for disease in diseases:
    for _ in range(n_rows // len(diseases)):
        data.append([
            np.random.choice([1, 0]),  # Fever
            np.random.choice([1, 0]),  # Cough
            np.random.choice([1, 0]),  # Headache
            np.random.choice([1, 0]),  # Fatigue
            disease
        ])

# Create the DataFrame
df = pd.DataFrame(data, columns=columns)

# Shuffle the data
df = df.sample(frac=1).reset_index(drop=True)

# Save to CSV
df.to_csv("medical.csv", index=False)

# Load the dataset
df = pd.read_csv('medical.csv')

# Split the dataset into features and target variable
X = df.drop('Disease', axis=1)  # Features
y = df['Disease']  # Target variable

# Convert boolean features to integers
X = X.astype(int)

# Apply standard scaling to the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Initialize the Decision Tree Classifier
clf = DecisionTreeClassifier()

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Generate a classification report
report = classification_report(y_test, y_pred)
print(report)

def expert_system_rules(symptoms):
    # Enhanced rules for better prediction
    if symptoms['Fever'] == 1 and symptoms['Cough'] == 1:
        return 'COVID-19'
    elif symptoms['Cough'] == 1 and symptoms['Fatigue'] == 1:
        return 'Flu'
    elif symptoms['Cough'] == 1 and symptoms['Headache'] == 1:
        return 'Common Cold'
    else:
        # Use the machine learning model for other predictions
        symptoms_df = pd.DataFrame([symptoms])
        symptoms_scaled = scaler.transform(symptoms_df)
        return clf.predict(symptoms_scaled)[0]

def get_symptoms_from_text(input_text):
    # Initialize symptoms dictionary
    symptoms = {'Fever': 0, 'Cough': 0, 'Headache': 0, 'Fatigue': 0}

    # Process the text with spaCy
    doc = nlp(input_text.lower())

    # Check for presence of symptoms in input text
    for token in doc:
        if token.text in ['fever', 'cough', 'headache', 'fatigue']:
            symptoms[token.text.capitalize()] = 1

    return symptoms

def predict_disease(input_text):
    symptoms = get_symptoms_from_text(input_text)
    predicted_disease = expert_system_rules(symptoms)
    return predicted_disease

if __name__ == "__main__":
    # Example user input
    user_input = input("Enter your symptoms (e.g., 'I have fever and cough'): ")

    # Predict disease based on user input
    predicted_disease = predict_disease(user_input)
    print(f'Predicted Disease: {predicted_disease}')


Accuracy: 25.20%
              precision    recall  f1-score   support

    COVID-19       0.27      0.20      0.23       755
 Common Cold       0.25      0.18      0.21       739
         Flu       0.25      0.53      0.34       724
     Healthy       0.24      0.11      0.16       782

    accuracy                           0.25      3000
   macro avg       0.25      0.26      0.23      3000
weighted avg       0.25      0.25      0.23      3000

Enter your symptoms (e.g., 'I have fever and cough'): Fever, Headache
Predicted Disease: Flu


`Adding recommendation system`

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import spacy

# Load the spaCy model
nlp = spacy.load('en_core_web_sm')

# Define the number of rows
n_rows = 10000

# Define the columns
columns = ["Fever", "Cough", "Headache", "Fatigue", "Disease"]

# Ensure balanced classes
diseases = ["COVID-19", "Flu", "Common Cold", "Healthy"]
data = []
for disease in diseases:
    for _ in range(n_rows // len(diseases)):
        data.append([
            np.random.choice([1, 0]),  # Fever
            np.random.choice([1, 0]),  # Cough
            np.random.choice([1, 0]),  # Headache
            np.random.choice([1, 0]),  # Fatigue
            disease
        ])

# Create the DataFrame
df = pd.DataFrame(data, columns=columns)

# Shuffle the data
df = df.sample(frac=1).reset_index(drop=True)

# Save to CSV
df.to_csv("medical.csv", index=False)

# Load the dataset
df = pd.read_csv('medical.csv')

# Split the dataset into features and target variable
X = df.drop('Disease', axis=1)  # Features
y = df['Disease']  # Target variable

# Convert boolean features to integers
X = X.astype(int)

# Apply standard scaling to the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Initialize the Decision Tree Classifier
clf = DecisionTreeClassifier()

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Generate a classification report
report = classification_report(y_test, y_pred)
print(report)

# Define recommendations for each disease
recommendations = {
    "COVID-19": "Please contact your healthcare provider immediately. Follow CDC guidelines for isolation and testing.",
    "Flu": "Rest and hydrate. Over-the-counter medications may help with symptoms. Consult your doctor if symptoms worsen.",
    "Common Cold": "Stay hydrated and get plenty of rest. Over-the-counter medications can alleviate symptoms. Consult your doctor if needed.",
    "Healthy": "You seem to be in good health. Continue to maintain a healthy lifestyle."
}

def expert_system_rules(symptoms):
    # Enhanced rules for better prediction
    if symptoms['Fever'] == 1 and symptoms['Cough'] == 1:
        return 'COVID-19'
    elif symptoms['Cough'] == 1 and symptoms['Fatigue'] == 1:
        return 'Flu'
    elif symptoms['Cough'] == 1 and symptoms['Headache'] == 1:
        return 'Common Cold'
    else:
        # Use the machine learning model for other predictions
        symptoms_df = pd.DataFrame([symptoms])
        symptoms_scaled = scaler.transform(symptoms_df)
        return clf.predict(symptoms_scaled)[0]

def get_symptoms_from_text(input_text):
    # Initialize symptoms dictionary
    symptoms = {'Fever': 0, 'Cough': 0, 'Headache': 0, 'Fatigue': 0}

    # Process the text with spaCy
    doc = nlp(input_text.lower())

    # Check for presence of symptoms in input text
    for token in doc:
        if token.text in ['fever', 'cough', 'headache', 'fatigue']:
            symptoms[token.text.capitalize()] = 1

    return symptoms

def predict_disease(input_text):
    symptoms = get_symptoms_from_text(input_text)
    predicted_disease = expert_system_rules(symptoms)
    # Retrieve the recommendation based on the predicted disease
    recommendation = recommendations.get(predicted_disease, "No recommendation available.")
    return predicted_disease, recommendation

if __name__ == "__main__":
    # Example user input
    user_input = input("Enter your symptoms (e.g., 'I have fever and cough'): ")

    # Predict disease and get recommendation based on user input
    predicted_disease, recommendation = predict_disease(user_input)

    # Print results
    print(f'Predicted Disease: {predicted_disease}')
    print(f'Recommendation: {recommendation}')



Accuracy: 24.73%
              precision    recall  f1-score   support

    COVID-19       0.25      0.18      0.21       774
 Common Cold       0.24      0.38      0.29       710
         Flu       0.28      0.19      0.23       772
     Healthy       0.24      0.25      0.24       744

    accuracy                           0.25      3000
   macro avg       0.25      0.25      0.24      3000
weighted avg       0.25      0.25      0.24      3000

Enter your symptoms (e.g., 'I have fever and cough'): Fever, Headche
Predicted Disease: Common Cold
Recommendation: Stay hydrated and get plenty of rest. Over-the-counter medications can alleviate symptoms. Consult your doctor if needed.


`load model in pickle`

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import spacy
import pickle

# Load the spaCy model
nlp = spacy.load('en_core_web_sm')

# Define the number of rows
n_rows = 10000

# Define the columns
columns = ["Fever", "Cough", "Headache", "Fatigue", "Disease"]

# Ensure balanced classes
diseases = ["COVID-19", "Flu", "Common Cold", "Healthy"]
data = []
for disease in diseases:
    for _ in range(n_rows // len(diseases)):
        data.append([
            np.random.choice([1, 0]),  # Fever
            np.random.choice([1, 0]),  # Cough
            np.random.choice([1, 0]),  # Headache
            np.random.choice([1, 0]),  # Fatigue
            disease
        ])

# Create the DataFrame
df = pd.DataFrame(data, columns=columns)

# Shuffle the data
df = df.sample(frac=1).reset_index(drop=True)

# Save to CSV
df.to_csv("medical.csv", index=False)

# Load the dataset
df = pd.read_csv('medical.csv')

# Split the dataset into features and target variable
X = df.drop('Disease', axis=1)  # Features
y = df['Disease']  # Target variable

# Convert boolean features to integers
X = X.astype(int)

# Apply standard scaling to the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Initialize the Decision Tree Classifier
clf = DecisionTreeClassifier()

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Generate a classification report
report = classification_report(y_test, y_pred)
print(report)

# Define recommendations for each disease
recommendations = {
    "COVID-19": "Please contact your healthcare provider immediately. Follow CDC guidelines for isolation and testing.",
    "Flu": "Rest and hydrate. Over-the-counter medications may help with symptoms. Consult your doctor if symptoms worsen.",
    "Common Cold": "Stay hydrated and get plenty of rest. Over-the-counter medications can alleviate symptoms. Consult your doctor if needed.",
    "Healthy": "You seem to be in good health. Continue to maintain a healthy lifestyle."
}

# Save the trained model and scaler to a pickle file
with open('model_and_scaler.pkl', 'wb') as f:
    pickle.dump({
        'model': clf,
        'scaler': scaler
    }, f)

# Save the recommendations dictionary to a pickle file
with open('recommendations.pkl', 'wb') as f:
    pickle.dump(recommendations, f)

def expert_system_rules(symptoms):
    # Enhanced rules for better prediction
    if symptoms['Fever'] == 1 and symptoms['Cough'] == 1:
        return 'COVID-19'
    elif symptoms['Cough'] == 1 and symptoms['Fatigue'] == 1:
        return 'Flu'
    elif symptoms['Cough'] == 1 and symptoms['Headache'] == 1:
        return 'Common Cold'
    else:
        # Use the machine learning model for other predictions
        symptoms_df = pd.DataFrame([symptoms])
        symptoms_scaled = scaler.transform(symptoms_df)
        return clf.predict(symptoms_scaled)[0]

def get_symptoms_from_text(input_text):
    # Initialize symptoms dictionary
    symptoms = {'Fever': 0, 'Cough': 0, 'Headache': 0, 'Fatigue': 0}

    # Process the text with spaCy
    doc = nlp(input_text.lower())

    # Check for presence of symptoms in input text
    for token in doc:
        if token.text in ['fever', 'cough', 'headache', 'fatigue']:
            symptoms[token.text.capitalize()] = 1

    return symptoms

def predict_disease(input_text):
    symptoms = get_symptoms_from_text(input_text)
    predicted_disease = expert_system_rules(symptoms)
    # Retrieve the recommendation based on the predicted disease
    recommendation = recommendations.get(predicted_disease, "No recommendation available.")
    return predicted_disease, recommendation

if __name__ == "__main__":
    # Load the model, scaler, and recommendations from pickle files
    with open('model_and_scaler.pkl', 'rb') as f:
        data = pickle.load(f)
        clf = data['model']
        scaler = data['scaler']

    with open('recommendations.pkl', 'rb') as f:
        recommendations = pickle.load(f)

    # Example user input
    user_input = input("Enter your symptoms (e.g., 'I have fever and cough'): ")

    # Predict disease and get recommendation based on user input
    predicted_disease, recommendation = predict_disease(user_input)

    # Print results
    print(f'Predicted Disease: {predicted_disease}')
    print(f'Recommendation: {recommendation}')


Accuracy: 25.37%
              precision    recall  f1-score   support

    COVID-19       0.26      0.13      0.18       769
 Common Cold       0.26      0.44      0.33       756
         Flu       0.24      0.18      0.21       755
     Healthy       0.25      0.27      0.26       720

    accuracy                           0.25      3000
   macro avg       0.25      0.25      0.24      3000
weighted avg       0.25      0.25      0.24      3000

Enter your symptoms (e.g., 'I have fever and cough'): cough
Predicted Disease: Healthy
Recommendation: You seem to be in good health. Continue to maintain a healthy lifestyle.


# `Rain Prediction fro scretch`

`load model`

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import pickle


`data exploration`

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/AqueeqAzam/data-science-and-machine-learning-datasets/main/environment.csv")
df.head(5)
df.isnull().sum()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   temperature           10000 non-null  float64
 1   humidity              10000 non-null  float64
 2   wind_speed            10000 non-null  float64
 3   precipitation         10000 non-null  float64
 4   atmospheric_pressure  10000 non-null  float64
 5   rain                  10000 non-null  int64  
dtypes: float64(5), int64(1)
memory usage: 468.9 KB


`data processing`

In [None]:
x = df.drop('rain', axis=1)
y = df['rain']

# split the data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# initialize the scaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# model training
rfc = RandomForestClassifier(n_estimators=100, random_state=42)
rfc.fit(x_train, y_train)

# model prediction
y_pred = rfc.predict(x_test_scaled)
print(y_pred)

# model evaluation
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy:, {accuracy*100:.2f}%')



[0 0 0 ... 0 0 0]
Accuracy:, 69.45%




`save model`

In [None]:
import pickle
with open ('rain_predict.pkl', 'wb') as file:
  pickle.dump({'model': rfc, 'scaler': scaler}, file)
  print('File created')

File created


`load model`

In [None]:
import pickle
with open('rain_predict.pkl', 'rb') as file:
  saved = pickle.load(file)
  model = saved['model']
  scaler = saved['scaler']


`adding user input`

In [None]:
import numpy as np
import pickle

with open('rain_predict.pkl', 'rb') as file:
  saved = pickle.load(file)
  model = saved['model']
  scaler = saved['scaler']

# get user input
def get_input():
  print('Please enter weather condition:')
  temperature = float(input('Temperature:'))
  humidity = float(input('Hunidity:'))
  wind_speed = float(input('wind speed:'))
  precipitation = float(input('Precipation:'))
  atp = float(input('ATP:'))

  return np.array([[temperature, humidity, wind_speed, precipitation, atp]])

def predict_rain(user_input):
  user_input_scaled = scaler.transform(user_input)
  prediction = model.predict(user_input_scaled)
  if prediction[0] == 1:
    return "rain"
  else:
    return 'no rain'

# Get user input and make a prediction
try:
    user_input = get_user_input()
    result = predict_rain(user_input)
    print(f"Prediction: {result}")
except ValueError as e:
    print(e)


Please enter weather condition:
Temperature:4
Hunidity:8
wind speed:34
Precipation:5.7
ATP:700
Predict:no rain




`adding expert system`

In [None]:
import pickle
import numpy as np

with open('rain_predict.pkl', 'rb') as file:
  saved = pickle.load(file)
  model = saved['model']
  scaler = saved['scaler']

# expert system code
def expert_system(ml_prediction, user_input):

  # user_input[0]: Accesses the first (and only) row of the input array, which contains the weather condition values.
  temperature, humidity, wind_speed, precipitation, atp = user_input[0]

  if temperature > 25 and humidity < 60:
    return 'no rain'

  if wind_speed > 20 and precipitation < 5:
      return "No Rain"

    # Rule 3: If atmospheric pressure is low, override ML prediction to "Rain"
  if atp < 1000:
      return "Rain"

    # Rule 4: If precipitation is high and humidity is high, override ML prediction to "Rain"
  if precipitation > 10 and humidity > 80:
      return "Rain"

    # Rule 5: If temperature is low and wind speed is low, override ML prediction to "No Rain"
  if temperature < 15 and wind_speed < 10:
      return "No Rain"

    # Default to ML prediction
  return "Rain" if ml_prediction > 0.5 else "No Rain"

# Get user input
def get_user_input():
    print("Please enter the following weather conditions:")
    temperature = float(input("Temperature (°C): "))
    humidity = float(input("Humidity (%): "))
    wind_speed = float(input("Wind Speed (km/h): "))
    precipitation = float(input("Precipitation (mm): "))
    atmospheric_pressure = float(input("Atmospheric Pressure (hPa): "))

    return np.array([[temperature, humidity, wind_speed, precipitation, atmospheric_pressure]])

def predict_rain(user_input):
  user_input_scaled = scaler.transform(user_input)
  ml_prediction = model.predict(user_input_scaled)[0]
  final_prediction = expert_system(ml_prediction, user_input)
  return final_prediction


# Get user input and make a prediction
try:
    user_input = get_user_input()
    result = predict_rain(user_input)
    print(f"Prediction: {result}")
except ValueError as e:
    print(e)

Please enter the following weather conditions:
Temperature (°C): 23
Humidity (%): 34
Wind Speed (km/h): 55
Precipitation (mm): 8.0
Atmospheric Pressure (hPa): 10
Prediction: Rain




`adding nlp`

In [None]:
import spacy

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

def extract_number(text):
    doc = nlp(text)
    for token in doc:
        if token.like_num:
            return float(token.text)
    raise ValueError("No numerical value found in the input.")

# Example usage
user_input = input('Enter temperature:')
number = extract_number(user_input)
print(number)  # Output: 25.0


Enter temperature:34 degeree
34.0


In [None]:
import pickle
import numpy as np
import spacy

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load the model and scaler for prediction
with open('rain_prediction_model.pkl', 'rb') as file:
    saved_objects = pickle.load(file)
    model = saved_objects['model']
    scaler = saved_objects['scaler']

def expert_system_rules(ml_prediction, user_input):
    temperature, humidity, wind_speed, precipitation, atmospheric_pressure = user_input[0]

    if temperature > 25 and humidity < 60:
        return "No Rain"
    if wind_speed > 20 and precipitation < 5:
        return "No Rain"
    if atmospheric_pressure < 1000:
        return "Rain"
    if precipitation > 10 and humidity > 80:
        return "Rain"
    if temperature < 15 and wind_speed < 10:
        return "No Rain"

    return "Rain" if ml_prediction > 0.5 else "No Rain"

def extract_numbers(text):
    doc = nlp(text)
    numbers = [float(token.text) for token in doc if token.like_num]
    if not numbers:
        raise ValueError("No numerical values found in the input.")
    return numbers

def get_user_input():
    print("Please enter the following weather conditions (you can type in sentences):")
    temperature = extract_number(input("Temperature (°C): "))
    humidity = extract_number(input("Humidity (%): "))
    wind_speed = extract_number(input("Wind Speed (km/h): "))
    precipitation = extract_number(input("Precipitation (mm): "))
    atmospheric_pressure = extract_number(input("Atmospheric Pressure (hPa): "))

    return np.array([[temperature, humidity, wind_speed, precipitation, atmospheric_pressure]])

def predict_rain(user_input):
    user_input_scaled = scaler.transform(user_input)
    ml_prediction = model.predict(user_input_scaled)[0]
    return expert_system_rules(ml_prediction, user_input)

# Get user input and make a prediction
try:
    user_input = get_user_input()
    result = predict_rain(user_input)
    print(f"Prediction: {result}")
except ValueError as e:
    print(e)