In [2]:
!ls

college_feedback.csv  sample_data


In [12]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Test the dataset is loaded**

In [None]:
import pandas as pd

df = pd.read_csv("college_feedback.csv")
df.head()


Unnamed: 0,feedback,sentiment
0,The teachers are very supportive,positive
1,Faculty explains concepts clearly,positive
2,Library facilities are excellent,positive
3,Classrooms are clean and spacious,positive
4,Campus environment is peaceful,positive


In [None]:
data = pd.read_csv("college_feedback.csv")
data.shape


(95, 2)

**Check Dataset Info**

In [None]:
df.info()
df['sentiment'].value_counts()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   feedback   95 non-null     object
 1   sentiment  95 non-null     object
dtypes: object(2)
memory usage: 1.6+ KB


Unnamed: 0_level_0,count
sentiment,Unnamed: 1_level_1
positive,49
negative,46


**Convert Text to Lowercase**

In [None]:
df["feedback"] = df["feedback"].str.lower()
df.head()

Unnamed: 0,feedback,sentiment
0,the teachers are very supportive,positive
1,faculty explains concepts clearly,positive
2,library facilities are excellent,positive
3,classrooms are clean and spacious,positive
4,campus environment is peaceful,positive


**Remove Punctuation & Special Characters**

In [None]:
import re

def clean_text(text):
  text = re.sub(r'[^a-zA-Z\s]', '', text)
  return text

df['feedback'] = df['feedback'].apply(clean_text)
df.head()

Unnamed: 0,feedback,sentiment
0,the teachers are very supportive,positive
1,faculty explains concepts clearly,positive
2,library facilities are excellent,positive
3,classrooms are clean and spacious,positive
4,campus environment is peaceful,positive


**Remove Stopwords (like is, the, are)**

In [None]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

def remove_stopwords(text):
  words = text.split()
  words = [w for w in words if w not in stop_words]
  return " ".join(words)

df['feedback'] = df['feedback'].apply(remove_stopwords)
df.head()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Unnamed: 0,feedback,sentiment
0,teachers supportive,positive
1,faculty explains concepts clearly,positive
2,library facilities excellent,positive
3,classrooms clean spacious,positive
4,campus environment peaceful,positive


**Encode Sentiment (Text → Numbers)**

In [None]:
df['sentiment'] = df['sentiment'].map({
    'positive':1,
    'negative':0
})
df.head()

Unnamed: 0,feedback,sentiment
0,teachers supportive,1
1,faculty explains concepts clearly,1
2,library facilities excellent,1
3,classrooms clean spacious,1
4,campus environment peaceful,1


# **Convert Text into Numbers & Train ML Model**

**Split Dataset into Train & Test**

In [None]:
from sklearn.model_selection import train_test_split

#features and labels
X=df['feedback']  #feedback test
y=df['sentiment'] # 1-positive, 0-negative

#split dataset (80% train, 20%train)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training samples:", len(X_train))
print("Testing samples:", len(X_test))

Training samples: 76
Testing samples: 19


**Convert Text to Numbers (TF-IDF)**

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

#convert TF-IDE vectorizer
tfidf = TfidfVectorizer(max_features=5000)  #consider top 5000 words

#Fit on trsining data & transform both train & test
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

print("TF-IDE shape:", X_train_tfidf.shape)

TF-IDE shape: (76, 157)


**Train Logistic Regression Model**

In [None]:
from sklearn.linear_model import LogisticRegression

#create and train model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

print("Model trained successfully!")

Model trained successfully!


**Test Model Accuracy**

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

#predict on test set
y_pred = model.predict(X_test_tfidf)

#Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

#confusion matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

#detailed classification report
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.3684210526315789
Confusion Matrix:
 [[2 4]
 [8 5]]
Classification Report:
               precision    recall  f1-score   support

           0       0.20      0.33      0.25         6
           1       0.56      0.38      0.45        13

    accuracy                           0.37        19
   macro avg       0.38      0.36      0.35        19
weighted avg       0.44      0.37      0.39        19



**Test Model on New Feedback**

In [None]:
#Example new feedback
new_feedback = ["The hostel food is horrible", "Teachers are amazing and helpful"]

#Transform text using same TF-IDF
new_tfidf = tfidf.transform(new_feedback)

#Predict sentiment
prediction = model.predict(new_tfidf)
pred_labels = ['Positive' if p==1 else 'Negative' for p in prediction]

for feedback, label in zip(new_feedback, pred_labels):
  print(f"Feedback: {feedback} → Sentiment: {label}")

Feedback: The hostel food is horrible → Sentiment: Negative
Feedback: Teachers are amazing and helpful → Sentiment: Positive


# **Save the Model & TF-IDF Vectorizer**

In [None]:
import joblib

# Save trained model
joblib.dump(model, "/content/drive/MyDrive/ML_Project/sentiment_model.pkl")

# Save TF-IDF vectorizer
joblib.dump(tfidf, "/content/drive/MyDrive/ML_Project/tfidf_vectorizer.pkl")

print("Model and TF-IDF vectorizer saved successfully!")


Model and TF-IDF vectorizer saved successfully!


**Verify Saved Files**

In [None]:
!ls /content/drive/MyDrive/ML_Project


sentiment_model.pkl  tfidf_vectorizer.pkl


**Load Saved Model & Vectorizer**

In [None]:
# Load saved model and vectorizer
loaded_model = joblib.load("/content/drive/MyDrive/ML_Project/sentiment_model.pkl")
loaded_tfidf = joblib.load("/content/drive/MyDrive/ML_Project/tfidf_vectorizer.pkl")

print("Model and vectorizer loaded successfully!")


Model and vectorizer loaded successfully!


**Test Loaded Model on New Feedback**

In [None]:
test_feedback = ["The college infrastructure is excellent",
                 "Hostel food quality is very poor"]

test_tfidf = loaded_tfidf.transform(test_feedback)
predictions = loaded_model.predict(test_tfidf)

for fb, pred in zip(test_feedback, predictions):
    print(fb, "→", "Positive" if pred == 1 else "Negative")


The college infrastructure is excellent → Positive
Hostel food quality is very poor → Negative


# **Create a Simple User Input System (Prediction)**

**User Input Prediction**

In [None]:
def predict_sentiment(text):
    text_tfidf = tfidf.transform([text])
    prediction = model.predict(text_tfidf)
    return prediction[0]

# Test
user_feedback = input("Enter college feedback: ")
print("Predicted Sentiment:", predict_sentiment(user_feedback))


Enter college feedback: Hostel food is very bad
Predicted Sentiment: 0


# **MINI WEB APP USING GRADIO**

In [5]:
!pip install gradio joblib scikit-learn



In [6]:
!pwd

/content


In [11]:
import gradio as gr
import joblib

# Load model and vectorizer
model = joblib.load("sentiment_model.pkl")
tfidf = joblib.load("tfidf_vectorizer.pkl")

def predict_sentiment(text):
    vec = tfidf.transform([text])
    pred = model.predict(vec)[0]

    if pred == 1:
        return "Positive"
    else:
        return "Negative"

gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=4, placeholder="Enter college feedback here..."),
    outputs="text",
    title="College Feedback Sentiment Analysis",
    description="Predict whether the feedback is Positive or Negative"
).launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://aed1f004d94919c7f8.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


