# SET UP OpenAI API KEYS

In [16]:
from skllm.config import SKLLMConfig

SKLLMConfig.set_openai_key("*******")
SKLLMConfig.set_openai_org("*******")

# FEW-SHOT CLASSIFICATION

In [17]:
from skllm import ZeroShotGPTClassifier
from skllm.datasets import get_classification_dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# sentiment analysis dataset
X, y = get_classification_dataset() 
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1203)

clf = ZeroShotGPTClassifier(openai_model = "gpt-3.5-turbo")

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

100%|██████████| 8/8 [00:08<00:00,  1.08s/it]


In [43]:
[print(f'Oracion: {X_test[i]},\nclasificacion: {y_pred[i]}\n') for i in range(len(X))]

print(f'\nThe accuracy score is : {accuracy_score(y_test, y_pred):.2f}')

Oracion: 'Through the Storm' was not bad, but it wasn't great either. The storyline was somewhat predictable, and the characters were somewhat stereotypical. It was an average movie at best.,
clasificacion: neutral

Oracion: 'The Fall of the Phoenix' was a letdown. The storyline was confusing and the characters were one-dimensional. I found myself checking my watch multiple times throughout the movie.,
clasificacion: negative

Oracion: I was thoroughly disappointed with 'Silver Shadows'. The plot was confusing and the performances were lackluster. I wouldn't recommend wasting your time on this one.,
clasificacion: negative

Oracion: The screenwriting in 'Under the Willow Tree' was superb. The dialogue felt real and the characters were well-rounded. The performances were also fantastic. I haven't enjoyed a movie this much in a while.,
clasificacion: positive

Oracion: The special effects in 'Star Battles: Nebula Conflict' were out of this world. I felt like I was actually in space. The 

# ZERO-SHOT CLASSIFICATION (No label availble)

In [39]:
zero_shot_classifier = ZeroShotGPTClassifier(openai_model = "gpt-3.5-turbo")

# No training, so we only pass the labels for the prediction
zero_shot_classifier.fit(None, ['positive', 'negative', 'neutral'])

# Predict
y_pred_zero_shot = zero_shot_classifier.predict(X_test)
y_pred_zero_shot

100%|██████████| 8/8 [00:07<00:00,  1.10it/s]


['neutral',
 'negative',
 'negative',
 'positive',
 'positive',
 'neutral',
 'negative',
 'negative']

In [41]:
[print(f'Oracion: {X_test[i]},\nclasificacion: {y_pred_zero_shot[i]}\n') for i in range(len(X))]

print(f'\nThe accuracy score is : {accuracy_score(y_test, y_pred_zero_shot):.2f}')

Oracion: 'Through the Storm' was not bad, but it wasn't great either. The storyline was somewhat predictable, and the characters were somewhat stereotypical. It was an average movie at best.,
clasificacion: neutral

Oracion: 'The Fall of the Phoenix' was a letdown. The storyline was confusing and the characters were one-dimensional. I found myself checking my watch multiple times throughout the movie.,
clasificacion: negative

Oracion: I was thoroughly disappointed with 'Silver Shadows'. The plot was confusing and the performances were lackluster. I wouldn't recommend wasting your time on this one.,
clasificacion: negative

Oracion: The screenwriting in 'Under the Willow Tree' was superb. The dialogue felt real and the characters were well-rounded. The performances were also fantastic. I haven't enjoyed a movie this much in a while.,
clasificacion: positive

Oracion: The special effects in 'Star Battles: Nebula Conflict' were out of this world. I felt like I was actually in space. The 

# TEXT VECTORIZATION (ada-002)

In [45]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

from skllm.preprocessing import GPTVectorizer

In [48]:
#Encoding the labels 'positive', 'negative', 'neutral'
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

y_train_encoded, y_test_encoded 

(array([2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 1, 0, 1, 1, 1, 0, 1, 0, 2, 1, 1]),
 array([1, 0, 0, 2, 2, 1, 1, 0]))

In [49]:
# Pipeline
steps = [('GPT_VETORIZATION', GPTVectorizer()), # embedding of dim 1536
         ('Clf', RandomForestClassifier())]

clf = Pipeline(steps)

clf.fit(X_train, y_train_encoded)

100%|██████████| 22/22 [00:07<00:00,  2.88it/s]


In [51]:
y_pred_encoded = clf.predict(X_test)

100%|██████████| 8/8 [00:03<00:00,  2.35it/s]


In [52]:
accuracy_score(y_test_encoded, y_pred_encoded)

0.875

In [54]:
y_pred_rf=le.inverse_transform(y_pred_encoded)
y_pred_rf

array(['neutral', 'negative', 'negative', 'positive', 'positive',
       'neutral', 'neutral', 'neutral'], dtype='<U8')

In [58]:
[print(f'Oracion: {X_test[i]},\nclasificacion: {y_pred_rf[i]}\ntrue label: {y_test[i]}\n') for i in range(len(X))]

print(f'\nThe accuracy score is : {accuracy_score(y_test, y_pred_rf):.2f}')

Oracion: 'Through the Storm' was not bad, but it wasn't great either. The storyline was somewhat predictable, and the characters were somewhat stereotypical. It was an average movie at best.,
clasificacion: neutral
true label: neutral

Oracion: 'The Fall of the Phoenix' was a letdown. The storyline was confusing and the characters were one-dimensional. I found myself checking my watch multiple times throughout the movie.,
clasificacion: negative
true label: negative

Oracion: I was thoroughly disappointed with 'Silver Shadows'. The plot was confusing and the performances were lackluster. I wouldn't recommend wasting your time on this one.,
clasificacion: negative
true label: negative

Oracion: The screenwriting in 'Under the Willow Tree' was superb. The dialogue felt real and the characters were well-rounded. The performances were also fantastic. I haven't enjoyed a movie this much in a while.,
clasificacion: positive
true label: positive

Oracion: The special effects in 'Star Battles: