In [None]:
import re

class CRFSentimentAnalyzer:
    def __init__(self, learning_rate=0.01, max_iterations=1000):
        self.learning_rate = learning_rate
        self.max_iterations = max_iterations
        self.weights = {}

    def train(self, dataset):
        for sentence, labels in dataset:
            for i in range(len(sentence)):
                features = self.extract_features(sentence, i)
                predicted_label = self.predict(features, labels)
                true_label = labels[i]
                if predicted_label != true_label:
                    self.update_weights(features, predicted_label, true_label)

    def extract_features(self, sentence, i):
        features = {}
        word = sentence[i]
        features[f'word:{word}'] = 1
        # Add more feature extraction logic here if needed
        for j in range(i - 1, i + 2):
            if j >= 0 and j < len(sentence):
                features[f'word-{j}:{sentence[j]}'] = 1
        features[f'is_capitalized:{word[0].isupper()}'] = 1
        features[f'is_punctuation:{re.match(r"^[,.!?;]", word) != None}'] = 1
        return features

    def predict(self, features, labels):
        scores = {label: sum(features.get(feature, 0) * weight for feature, weight in self.weights.get(label, {}).items())
                  for label in labels}
        return max(scores, key=scores.get)

    def update_weights(self, features, predicted_label, true_label):
        if true_label not in self.weights:
            self.weights[true_label] = {}
        if predicted_label not in self.weights:
            self.weights[predicted_label] = {}

        for feature, value in features.items():
            self.weights[true_label][feature] = self.weights[true_label].get(feature, 0) + self.learning_rate * value
            self.weights[predicted_label][feature] = self.weights[predicted_label].get(feature, 0) - self.learning_rate * value

    def classify(self, sentence):
        predicted_labels = []
        for i in range(len(sentence)):
            features = self.extract_features(sentence, i)
            predicted_label = self.predict(features, list(self.weights.keys()))
            predicted_labels.append(predicted_label)
        return predicted_labels


def extract_data(data):
    # data1 = data.split("")
    words = []
    labels = []
    l=[]
    for sentence in data:
        if sentence=="":
            obj=(words,labels)
            l.append((obj))
            labels=[]
            words=[]
            obj=()
        else:
            a=sentence.split(" ")

            words.append(a[0])
            labels.append(a[1])
    return l

def extract_data_test(data):
    # data1 = data.split("")

    L=[]
    l = []
    for sentence in data:
        if sentence=="":
            L.append(l)
            l = []
        else:
            l.append(sentence)
    return L

# Given dataset
with open(r'Data\ES\train') as f:
    data = f.read().splitlines()

dataset = extract_data(data)

# Initialize and train the sentiment analyzer
analyzer = CRFSentimentAnalyzer()
analyzer.train(dataset)

# Test sentences
with open(r'Data\ES\dev.in') as f:
    test_data = f.read().splitlines()

test_sentences = extract_data_test(test_data)

# Perform sentiment analysis on test sentences
with open(r'Data\ES\dev.p4.out', "w+") as f:
    for sentence in test_sentences:
        predicted_labels = analyzer.classify(sentence)
        for j in range(len(predicted_labels)):
            f.write(sentence[j] + " " + predicted_labels[j] + "\n")
        else:
            f.write("\n")

In [None]:
class NaiveBayes:
    def __init__(self):
        self.word_count = {}
        self.class_count = {}
        self.total_words = set()

    def fit(self, data):
        lines = data.split('\n')
        for line in lines:
            if not line.strip():
                continue

            parts = line.split()
            if len(parts) != 2:
                continue

            word, tag = parts
            self.total_words.add(word)
            
            if tag not in self.class_count:
                self.class_count[tag] = 0
            self.class_count[tag] += 1
            
            if tag not in self.word_count:
                self.word_count[tag] = {}
            if word not in self.word_count[tag]:
                self.word_count[tag][word] = 0
            self.word_count[tag][word] += 1

    def predict(self, sentence):
        sentiments = []
        for word in sentence.split():
            max_prob = float('-inf')
            best_tag = 'Outside'
            for tag in self.class_count:
                prob = self.class_count[tag]
                # Use Laplace smoothing for word probabilities
                word_prob = (self.word_count[tag].get(word, 0) + 1) / (self.class_count[tag] + len(self.total_words))
                prob *= word_prob
                if prob > max_prob:
                    max_prob = prob
                    best_tag = tag
            sentiments.append(best_tag)
        return sentiments

# Example Usage:
with open(r'Data\\ES\\train') as f:
    data = f.read()

classifier = NaiveBayes()
classifier.fit(data)

print(classifier.predict("Risotto"))  # Expected: negative (based on training data)


In [8]:
class CRFSentimentAnalyzer:
    def __init__(self, learning_rate=0.01, max_iterations=1000):
        self.learning_rate = learning_rate
        self.max_iterations = max_iterations
        self.weights = {}

    def train(self, dataset):
        for sentence, labels in dataset:
            for i in range(len(sentence)):
                features = self.extract_features(sentence, i)
                predicted_label = self.predict(features, labels)
                true_label = labels[i]
                if predicted_label != true_label:
                    self.update_weights(features, predicted_label, true_label)

    def extract_features(self, sentence, i):
        features = {}
        word = sentence[i]
        features[f'word:{word}'] = 1
        # Add more feature extraction logic here if needed
        return features

    def predict(self, features, labels):
        scores = {label: sum(features.get(feature, 0) * weight for feature, weight in self.weights.get(label, {}).items())
                  for label in labels}
        return max(scores, key=scores.get)

    def update_weights(self, features, predicted_label, true_label):
        if true_label not in self.weights:
            self.weights[true_label] = {}
        if predicted_label not in self.weights:
            self.weights[predicted_label] = {}

        for feature, value in features.items():
            self.weights[true_label][feature] = self.weights[true_label].get(feature, 0) + self.learning_rate * value
            self.weights[predicted_label][feature] = self.weights[predicted_label].get(feature, 0) - self.learning_rate * value

    def classify(self, sentence):
        predicted_labels = []
        for i in range(len(sentence)):
            features = self.extract_features(sentence, i)
            predicted_label = self.predict(features, list(self.weights.keys()))
            predicted_labels.append(predicted_label)
        return predicted_labels

# Given dataset
# ...

# Initialize and train the sentiment analyzer
# ...

# Test sentences
# ...

# Given dataset

def extract_data(data):
    # data1 = data.split("")
    words = []
    labels = []
    l=[]
    for sentence in data:
        if sentence=="":
            obj=(words,labels)
            l.append((obj))
            labels=[]
            words=[]
            obj=()
        else:
            a=sentence.split(" ")

            words.append(a[0])
            labels.append(a[1])
    return l

def extract_data_test(data):
    # data1 = data.split("")

    L=[]
    l = []
    for sentence in data:
        if sentence=="":
            L.append(l)
            l = []
        else:
            l.append(sentence)
    return L

with open(r'Data\\ES\\train') as f:
    data = f.read().splitlines()


dataset = extract_data(data)
# for sentence in data:
#     dataset.append(extract_data(sentence))


# Initialize and train the sentiment analyzer
analyzer = CRFSentimentAnalyzer()
analyzer.train(dataset)

# Test sentences
with open (r'Data\\ES\\dev.in') as f:
    test_data = f.read().splitlines()

test_sentences = extract_data_test(test_data)

# Perform sentiment analysis on test sentences
with open(r'Data\\ES\\dev.p4.out', "w+") as f:
    for sentence in test_sentences:
        predicted_labels = analyzer.classify(sentence)
        for j in range(len(predicted_labels)):
            f.write(sentence[j] + " " + predicted_labels[j] + "\n")
        else:
            f.write("\n")