In [4]:
# Import necessary libraries
import pandas as pd
from textblob import TextBlob

# Step 1: Load the CSV file
df = pd.read_csv('D:\Github\SocialSentinel\data\d4\instagram_reach.csv')

# Step 2: Data Preprocessing
# Keep only relevant columns (Caption, Hashtags)
df = df[['Caption', 'Hashtags']]

# Remove any missing or NaN values
df.dropna(subset=['Caption', 'Hashtags'], inplace=True)

# Step 3: Sentiment Analysis Function
def analyze_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity  # Returns a score between -1 and 1

# Step 4: Calculate Sentiment Scores for Caption and Hashtags
df['caption_score'] = df['Caption'].apply(analyze_sentiment)
df['hashtag_score'] = df['Hashtags'].apply(analyze_sentiment)

# Step 5: Combine Scores to Determine Overall Sentiment
df['overall_score'] = (df['caption_score'] + df['hashtag_score']) / 2

# Step 6: Categorize Sentiment
def categorize_sentiment(score):
    if score < 0:
        return 'Negative'
    elif score > 0:
        return 'Positive'
    else:
        return 'Neutral'

df['sentiment'] = df['overall_score'].apply(categorize_sentiment)

# Step 7: Create a new DataFrame with required columns
final_df = df[['Caption', 'Hashtags', 'sentiment', 'overall_score']]

# Step 8: Save to a new CSV file
final_df.to_csv('instagram_reach_with_sentiments.csv', index=False)

print("Sentiment analysis complete! New file saved as 'instagram_reach_with_sentiments.csv'.")

OSError: [Errno 22] Invalid argument: 'D:\\Github\\SocialSentinel\test\\sentiments.csv'

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from imblearn.over_sampling import RandomOverSampler
from collections import Counter

# Step 1: Load the new CSV file
df = pd.read_csv('D:\Github\SocialSentinel\test\sentiments.csv')

# Step 2: Prepare Data for Machine Learning
X = df['Caption'] + " " + df['Hashtags']  # Features (text data)
y = df['sentiment']  # Labels (sentiment categories)

# Step 3: Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Step 4: Convert Text Data into Numerical Features
tfidf = TfidfVectorizer(max_features=5000)  # Adjust max_features as needed
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Step 5: Use Random Over Sampling to Balance Classes
ros = RandomOverSampler(random_state=42)
X_train_balanced, y_train_balanced = ros.fit_resample(X_train_tfidf, y_train)

print("Original training set shape:", Counter(y_train))
print("Balanced training set shape:", Counter(y_train_balanced))

# Step 6: Train a Machine Learning Model (Logistic Regression)
model = LogisticRegression(class_weight='balanced')  # Handle class imbalance by adjusting weights
model.fit(X_train_balanced, y_train_balanced)

# Step 7: Evaluate the Model
y_pred = model.predict(X_test_tfidf)
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Step 8: Make Predictions on New Data
def predict_sentiment(new_caption, new_hashtags):
    new_text = new_caption + " " + new_hashtags
    new_tfidf = tfidf.transform([new_text])
    return model.predict(new_tfidf)[0]


# Example usage
new_caption = "hello"
new_hashtags = "#programming"
predicted_sentiment = predict_sentiment(new_caption, new_hashtags)
print(f"Predicted Sentiment: {predicted_sentiment}")

new_caption = "The Internet of Things : A Very Short Story."
new_hashtags = "#MachineLearning"
predicted_sentiment = predict_sentiment(new_caption, new_hashtags)
print(f"Predicted Sentiment: {predicted_sentiment}")

In [5]:
new_caption = "Tag him who promised to help you but he doesn't"
new_hashtags = "#MachineLearning"
predicted_sentiment = predict_sentiment(new_caption, new_hashtags)
print(f"Predicted Sentiment: {predicted_sentiment}")

Predicted Sentiment: Neutral


In [2]:
# Import necessary libraries
import pandas as pd
from textblob import TextBlob
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
# Using Random Forest for better accuracy
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import RandomOverSampler
from collections import Counter

# Step 1: Load the CSV file
df = pd.read_csv('D:\Github\SocialSentinel\test\sentiments.csv')

# Step 2: Data Preprocessing
# Keep only relevant columns (Caption, Hashtags)
df = df[['Caption', 'Hashtags']]

# Remove any missing or NaN values
df.dropna(subset=['Caption', 'Hashtags'], inplace=True)

# Step 3: Sentiment Analysis Function
def analyze_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity 


# Step 4: Calculate Sentiment Scores for Caption and Hashtags
df['caption_score'] = df['Caption'].apply(analyze_sentiment)
df['hashtag_score'] = df['Hashtags'].apply(analyze_sentiment)

# Step 5: Combine Scores to Determine Overall Sentiment
df['overall_score'] = (df['caption_score'] + df['hashtag_score']) / 2

# Step 6: Categorize Sentiment

def categorize_sentiment(score):
    if score < 0:
        return 'Negative'
    elif score > 0:
        return 'Positive'
    else:
        return 'Neutral'


df['sentiment'] = df['overall_score'].apply(categorize_sentiment)

# Step 7: Create a new DataFrame with required columns
final_df = df[['Caption', 'Hashtags', 'sentiment', 'overall_score']]

# Step 8: Save to a new CSV file
final_df.to_csv('instagram_reach_with_sentiments.csv', index=False)

print("Sentiment analysis complete! New file saved as 'instagram_reach_with_sentiments.csv'.")

# Load the new CSV file for training the model
df = pd.read_csv('instagram_reach_with_sentiments.csv')

# Prepare Data for Machine Learning
X = df['Caption'] + " " + df['Hashtags']  # Features (text data)
y = df['sentiment']  # Labels (sentiment categories)

# Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

# Convert Text Data into Numerical Features
tfidf = TfidfVectorizer(max_features=5000)  # Adjust max_features as needed
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Use Random Over Sampling to Balance Classes
ros = RandomOverSampler(random_state=42)
X_train_balanced, y_train_balanced = ros.fit_resample(X_train_tfidf, y_train)

print("Original training set shape:", Counter(y_train))
print("Balanced training set shape:", Counter(y_train_balanced))

# Train a Machine Learning Model (Random Forest Classifier)
model = RandomForestClassifier(class_weight='balanced', random_state=42)
model.fit(X_train_balanced, y_train_balanced)

# Save the model and the TF-IDF vectorizer
with open('sentiment_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

with open('tfidf_vectorizer.pkl', 'wb') as vectorizer_file:
    pickle.dump(tfidf, vectorizer_file)

# Evaluate the Model
y_pred = model.predict(X_test_tfidf)
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Make Predictions on New Data
def predict_sentiment(new_caption, new_hashtags):
    # Load the model and vectorizer
    with open('sentiment_model.pkl', 'rb') as model_file:
        loaded_model = pickle.load(model_file)

    with open('tfidf_vectorizer.pkl', 'rb') as vectorizer_file:
        loaded_vectorizer = pickle.load(vectorizer_file)

    new_text = new_caption + " " + new_hashtags
    new_tfidf = loaded_vectorizer.transform([new_text])
    return loaded_model.predict(new_tfidf)[0]

OSError: [Errno 22] Invalid argument: 'D:\\Github\\SocialSentinel\test\\sentiments.csv'

In [2]:
new_caption = "I am happy"
new_hashtags = "#MachineLearning"
predicted_sentiment = predict_sentiment(new_caption, new_hashtags)
print(f"Predicted Sentiment: {predicted_sentiment}")

Predicted Sentiment: Neutral


In [4]:
# Load Random Forest model
with open('sentiment_model.pkl', 'rb') as random_forest_model_file:
    random_forest_model = pickle.load(random_forest_model_file)