In [None]:
import numpy as np
import pandas as pd
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

# Load the dataset
data = pd.read_csv("kiswahili_data.csv")


# Split the dataset
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Print the shape of the training and testing sets
print("Training set shape:", train_data.shape)
print("Testing set shape:", test_data.shape)


# Initialize the vectorizer
vectorizer = CountVectorizer(stop_words='english')

# Vectorize the training data
train_vectors = vectorizer.fit_transform(train_data['text'])

# Vectorize the testing data
test_vectors = vectorizer.transform(test_data['text'])

# Train the Naïve Bayes model
nb_model = MultinomialNB()
nb_model.fit(train_vectors, train_data['sentiment'])

nb_predictions = nb_model.predict(test_vectors)
print("Naïve Bayes accuracy:", accuracy_score(test_data['sentiment'], nb_predictions))
print("Naïve Bayes precision:", precision_score(test_data['sentiment'], nb_predictions, average='weighted'))
print("Naïve Bayes recall:", recall_score(test_data['sentiment'], nb_predictions, average='weighted'))
print("Naïve Bayes confusion matrix:\n", confusion_matrix(test_data['sentiment'], nb_predictions))

# Training the Support Vector Machine model
svm_model = LinearSVC(max_iter=50000)
svm_model.fit(train_vectors, train_data['sentiment'])

svm_predictions = svm_model.predict(test_vectors)
svm_accuracy = accuracy_score(test_data['sentiment'], svm_predictions)
svm_precision = precision_score(test_data['sentiment'], svm_predictions, average='weighted')
svm_recall = recall_score(test_data['sentiment'], svm_predictions, average='weighted')
svm_confusion_matrix = confusion_matrix(test_data['sentiment'], svm_predictions)

print("Support Vector Machine accuracy:", svm_accuracy)
print("Support Vector Machine precision:", svm_precision)
print("Support Vector Machine recall:", svm_recall)
print("Support Vector Machine confusion matrix:\n", svm_confusion_matrix)


# Load the trained Support Vector Machine model
svm_model = LinearSVC()
svm_model.fit(train_vectors, train_data['sentiment'])

# Process the new tweet
tweet = "Naomba kujua kama kuna mtu anaweza kuniambia jinsi ya kufanya kazi ya kusafisha kwa kutumia dawa za asili"
new_tweet_vector = vectorizer.transform([tweet])

# Predict the sentiment of the new tweet
new_tweet_sentiment = svm_model.predict(new_tweet_vector)

# Print the sentiment of the new tweet
print("The sentiment of this swahili text is : " + new_tweet_sentiment[0])


def predict_sentiment(tweet):
    # Process the new sentiment
    new_tweet_vector = vectorizer.transform([tweet])

    # Predict the sentiment of the new tweet
    new_tweet_sentiment = svm_model.predict(new_tweet_vector)

    return new_tweet_sentiment[0]

tweet_input = gr.inputs.Textbox(lines=3, label="Enter your swahili tweet")
prediction = gr.outputs.Label(label="The sentiment of this text is : ")

gr.Interface(fn=predict_sentiment, inputs=tweet_input, outputs=prediction, title="Kiswahili Sentiment Analyzer", 
             description="Enter a swahili text.").launch()
