In [27]:
!pip install keras



In [28]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras_tuner import RandomSearch
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from keras.layers import MaxPooling1D


In [None]:
# Load the datasets
train_df = pd.read_csv('../Resources/train_2.csv', encoding='ISO-8859-1')
test_df = pd.read_csv('../Resources/test_2.csv', encoding='ISO-8859-1')


In [None]:
train_df.info()

In [None]:
test_df.info()

In [30]:
# Assuming 'text' is the feature column and 'sentiment' is the target
X_train, y_train = train_df['text'].fillna(''), train_df['sentiment']
X_test, y_test = test_df['text'].fillna(''), test_df['sentiment']

# Instantiate the encoder
label_encoder = LabelEncoder()

# Fit the encoder and transform the target columns to numerical labels
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Preprocess and Vectorize text data for the logistic regression model
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train).toarray()
X_test_tfidf = tfidf_vectorizer.transform(X_test).toarray()

# Scale the TF-IDF features
scaler = StandardScaler(with_mean=False)  # Set with_mean=False for sparse data compatibility
X_train_tfidf_scaled = scaler.fit_transform(X_train_tfidf)
X_test_tfidf_scaled = scaler.transform(X_test_tfidf)

# Train a Logistic Regression model with the scaled data
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_tfidf_scaled, y_train_encoded) 
lr_predictions = lr_model.predict(X_test_tfidf_scaled)
print("Logistic Regression Accuracy:", accuracy_score(y_test_encoded, lr_predictions))

# Prepare data for CNN
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
maxlen = 100
X_train_pad = pad_sequences(X_train_seq, maxlen=maxlen)
X_test_pad = pad_sequences(X_test_seq, maxlen=maxlen)

# Define a model-building function for the tuner
def build_model(hp):
    model = Sequential([
    Embedding(input_dim=5000, output_dim=50),
    Conv1D(
        filters=hp.Int('filters', min_value=32, max_value=128, step=32),
        kernel_size=hp.Choice('kernel_size', values=[3, 5, 7]),
        activation='relu'),
    MaxPooling1D(pool_size=2),
    Conv1D(
        filters=64,
        kernel_size=3,
        activation='relu'
    ),
    MaxPooling1D(pool_size=2),
    Conv1D(
        filters=128,
        kernel_size=3,
        activation='relu'
    ),
    GlobalMaxPooling1D(),
    Dense(units=hp.Int('dense_units', min_value=10, max_value=100, step=10), activation='relu'),
    Dense(1, activation='sigmoid')
])

    
    model.compile(optimizer=hp.Choice('optimizer', ['adam']),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Initialize the tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=1,
    directory='my_dir',
    project_name='sentiment_analysis'
)

# Execute the search with encoded labels
tuner.search(X_train_pad, y_train_encoded, epochs=5, validation_split=0.1)

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Evaluate the best model with encoded labels
_, accuracy = best_model.evaluate(X_test_pad, y_test_encoded)
print("CNN with Tuner Accuracy:", accuracy)

Logistic Regression Accuracy: 0.6590265987549518
Reloading Tuner from my_dir\sentiment_analysis\tuner0.json


  trackable.load_own_variables(weights_store.get(inner_path))


ValueError: A total of 3 objects could not be loaded. Example error message for object <Conv1D name=conv1d_1, built=True>:

Layer 'conv1d_1' expected 2 variables, but received 0 variables during loading. Expected: ['kernel', 'bias']

List of objects that could not be loaded:
[<Conv1D name=conv1d_1, built=True>, <Conv1D name=conv1d_2, built=True>, <Dense name=dense, built=True>]