In [None]:
import pandas as pd
import re
import string
import tensorflow as tf
import keras
from keras.optimizers import *
from keras import regularizers
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TensorBoard, EarlyStopping
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import classification_report


In [None]:
def load_model(model_path):
    return keras.models.load_model(model_path)

In [None]:
def preprocess_data(dataFrame):
    df = dataFrame[['keyword','tweet','disaster']]
    df.dropna(subset=["keyword"], inplace=True)
    def cleaning_URLs(data):
      return re.sub('((www.[^s]+)|(https?://[^s]+))',' ',data)
    df['tweet'] = df['tweet'].apply(lambda x: cleaning_URLs(x))
    def remove_username(data):
      temp = [re.sub('@[^\s]+','',data)]
      return temp[0]
    df['tweet'] = df['tweet'].apply(lambda x: remove_username(x))
    def remove_hashes(data):
      temp = [re.sub('#','',data)]
      return temp[0]
    df['tweet'] = df['tweet'].apply(lambda x: remove_hashes(x))
    df['tweet'] = df['tweet'].str.lower()
    english_punctuations = string.punctuation
    punctuations_list = english_punctuations
    def cleaning_punctuations(text):
      translator = str.maketrans('', '', punctuations_list)
      return text.translate(translator)
    df['tweet']= df['tweet'].apply(lambda x: cleaning_punctuations(x))
    def remove_weird(x):
      cleaned_text = [re.sub(r'[^\x00-\x7F]+', '', x)]
      return cleaned_text[0]
    df['tweet']= df['tweet'].apply(lambda x: remove_weird(x))

    X = df['tweet']
    y = df['disaster'].astype(int)
    # Tokenize the text
    tokenizer = tf.keras.preprocessing.text.Tokenizer()
    tokenizer.fit_on_texts(X)

    # Convert text to sequences of integers
    X_sequences = tokenizer.texts_to_sequences(X)


    max_length = max(map(len, X_sequences))
    X_padded = tf.keras.preprocessing.sequence.pad_sequences(X_sequences,maxlen=max_length, padding='post')

    return X_padded, y

In [None]:
def preprocess_and_predict(model_path, testing_data):
    X_test_scaled, _ = preprocess_data(testing_data)  # Ignoring the y part, as we're only scaling X here

    loaded_model = load_model(model_path)

    # Make predictions using the loaded model
    predictions = loaded_model.predict(X_test_scaled)

    return predictions

In [None]:
def evaluate_model(model_path, testing_data):
    # Preprocessing the data to separate features and target
    X_test, y_test = preprocess_data(testing_data)  # Here we need both features and target

    # Call the preprocess_and_predict function
    predictions = preprocess_and_predict(model_path, testing_data)

    # Calculate the error between true values and predicted values
    error = mean_squared_error(y_test, predictions)
    predicted_classes = np.where(predictions > 0.5, 1, 0)
    accuracy = accuracy_score(y_test, predicted_classes)
    print(classification_report(y_test, predicted_classes))
    # Print the error
    print(f'Mean Squared Error: {error}')
    print(f'accuracy: {accuracy}')

In [None]:
if __name__ == "__main__":
    # Load your testing dataset
    # data = pd.read_csv('path_to_your_wine_quality_dataset.csv')
    # model_path = 'path_to_your_saved_model.pkl'
    # evaluate_model(model_path, data)

    data = pd.read_csv('/content/test.csv')
    model_path = '/content/Weightsnew.keras'
    evaluate_model(model_path, data)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(subset=["keyword"], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tweet'] = df['tweet'].apply(lambda x: cleaning_URLs(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tweet'] = df['tweet'].apply(lambda x: remove_username(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,co

              precision    recall  f1-score   support

           0       0.91      0.96      0.93      4323
           1       0.94      0.87      0.90      3229

    accuracy                           0.92      7552
   macro avg       0.92      0.91      0.91      7552
weighted avg       0.92      0.92      0.92      7552

Mean Squared Error: 0.09771656256252172
accuracy: 0.9175052966101694
