# LOAD DATASET


In [18]:
import pandas as pd

df = pd.read_csv("/content/top_insta_influencers_data.csv")

df = df[['channel_info']].dropna()
print(df.head())

  channel_info
0    cristiano
1  kyliejenner
2     leomessi
3  selenagomez
4      therock


# CREATE SENTIMENT

In [19]:
positive_words = [
    'official', 'fitness', 'love', 'music', 'actor',
    'cricket', 'football', 'fashion', 'model', 'star'
]

def get_sentiment(text):
    text = str(text).lower()
    for word in positive_words:
        if word in text:
            return 1   # Positive
    return 0           # Negative

df['sentiment'] = df['channel_info'].apply(get_sentiment)

# TOKENIZATION

In [20]:
def tokenize(text):
    return str(text).lower().split()


# MANUAL VOCABULARY

In [21]:
word2idx = {"<pad>": 0}
idx = 1

for text in df['channel_info']:
    for word in tokenize(text):
        if word not in word2idx:
            word2idx[word] = idx
            idx += 1

vocab_size = len(word2idx)


# ENCODE + PADDING

In [22]:
import torch

MAX_LEN = 20

def encode(text):
    tokens = tokenize(text)
    encoded = [word2idx.get(word, 0) for word in tokens]
    encoded = encoded[:MAX_LEN]
    encoded += [0] * (MAX_LEN - len(encoded))
    return torch.tensor(encoded)


# DATASET CLASS

In [23]:
from torch.utils.data import Dataset

class InstaSentimentDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return encode(self.texts[idx]), torch.tensor(self.labels[idx], dtype=torch.float32)


# TRAIN-TEST SPLIT

In [24]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

X_train, X_test, y_train, y_test = train_test_split(
    df['channel_info'], df['sentiment'], test_size=0.2, random_state=42
)

train_data = InstaSentimentDataset(X_train.values, y_train.values)
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)


# SIMPLE ANN MODEL

In [25]:
import torch.nn as nn

class SentimentModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, 32)
        self.fc = nn.Linear(32 * MAX_LEN, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embedding(x)
        x = x.view(x.size(0), -1)
        return self.sigmoid(self.fc(x))


# TRAINING

In [26]:
model = SentimentModel(vocab_size)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(3):
    total_loss = 0
    for text, label in train_loader:
        optimizer.zero_grad()
        output = model(text).squeeze()
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Epoch 1, Loss: 2.1316
Epoch 2, Loss: 0.7103
Epoch 3, Loss: 0.7638


# PREDICTION

In [27]:
def predict_sentiment(text):
    model.eval()
    with torch.no_grad():
        encoded = encode(text).unsqueeze(0)
        output = model(encoded)
        return "Positive ðŸ˜Š" if output.item() > 0.5 else "Negative ðŸ˜ž"

print(predict_sentiment("official fitness model"))
print(predict_sentiment("random channel page"))


Negative ðŸ˜ž
Negative ðŸ˜ž
