In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report
from scipy.stats import mode

# Path to your training dataset
data_path = r"C:/Users/Asus/Desktop/Rishabh/twitter/twitter_training.csv"

# Load the training dataset
data = pd.read_csv(data_path)
print("Training Data Loaded:")
print(data.head())

# Check if required columns are present
if 'Tweet' not in data.columns or 'Sentiment' not in data.columns:
    raise ValueError("The dataset must contain 'Tweet' and 'Sentiment' columns.")

# Handle missing values in the 'Tweet' column
data.dropna(subset=['Tweet'], inplace=True)  # Drop rows where 'Tweet' is NaN

# Split data into training and testing sets
X = data['Tweet']
y = data['Sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model pipeline
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(X_train, y_train)

# Evaluate the model on the test set
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Function to load and process the validation dataset
def load_validation_data(file_path):
    if file_path.endswith('.csv'):
        validation_data = pd.read_csv(file_path)
    elif file_path.endswith('.xlsx'):
        validation_data = pd.read_excel(file_path)
    else:
        raise ValueError("Please upload a CSV or Excel file.")
    
    # Ensure the file has the 'Tweet' column
    if 'Tweet' not in validation_data.columns:
        raise ValueError("The uploaded file must contain a 'Tweet' column.")
    
    return validation_data

# Main function to run the sentiment analysis
def main():
    # Get file path from user input
    file_path = input("Please enter the path to your validation CSV or Excel file: ")
    
    try:
        # Load validation data
        validation_data = load_validation_data(file_path)

        # Handle missing values in validation data
        validation_data.dropna(subset=['Tweet'], inplace=True)  # Drop rows where 'Tweet' is NaN

        # Predict sentiments on validation data
        validation_data['Predicted_Sentiment'] = validation_data['Tweet'].apply(lambda x: model.predict([x])[0])
        
        # Display the prediction results
        print("Prediction Results:")
        print(validation_data[['Tweet', 'Predicted_Sentiment']])
        print("overall result : ",validation_data['Predicted_Sentiment'].mode()[0])

        # Save results to a CSV file
        output_file = "predicted_sentiments.csv"
        validation_data.to_csv(output_file, index=False)
        print(f"Results saved to {output_file}.")

    except Exception as e:
        print(f"An error occurred: {e}")

# Run the main function
if __name__ == "__main__":
    main()


Training Data Loaded:
   Unnamed: 0    ID     Category Sentiment  \
0           0  2401  Borderlands  Positive   
1           1  2401  Borderlands  Positive   
2           2  2401  Borderlands  Positive   
3           3  2401  Borderlands  Positive   
4           4  2401  Borderlands  Positive   

                                               Tweet  
0  I am coming to the borders and I will kill you...  
1  im getting on borderlands and i will kill you ...  
2  im coming on borderlands and i will murder you...  
3  im getting on borderlands 2 and i will murder ...  
4  im getting into borderlands and i can murder y...  
Model Accuracy: 72.17%
Classification Report:
              precision    recall  f1-score   support

  Irrelevant       0.96      0.39      0.56      2624
    Negative       0.64      0.90      0.75      4463
     Neutral       0.85      0.61      0.71      3589
    Positive       0.70      0.83      0.76      4123

    accuracy                           0.72     14799