In [22]:
# Install boto3 and check if the installation was successful:
!pip install boto3
!pip show boto3
# Restart the kernel to ensure changes are picked up

Collecting boto3
  Downloading boto3-1.35.90-py3-none-any.whl.metadata (6.7 kB)
Collecting botocore<1.36.0,>=1.35.90 (from boto3)
  Downloading botocore-1.35.90-py3-none-any.whl.metadata (5.7 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3)
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.11.0,>=0.10.0 (from boto3)
  Downloading s3transfer-0.10.4-py3-none-any.whl.metadata (1.7 kB)
Downloading boto3-1.35.90-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading botocore-1.35.90-py3-none-any.whl (13.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m106.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Downloading s3transfer-0.10.4-py3-none-any.whl (83 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.2/83.2 kB[0m [31m7.5 MB/s[0m eta [36m0:

In [23]:
import boto3

In [24]:
%pip install datasets



In [25]:
import warnings
warnings.filterwarnings('ignore')

In [26]:
from datasets import Dataset
import pandas as pd


In [27]:
dataset = pd.read_csv('https://raw.githubusercontent.com/laxmimerit/All-CSV-ML-Data-Files-Download/master/IMDB-Dataset.csv')
dataset.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [28]:
data=Dataset.from_pandas(dataset)

In [29]:
data=data.train_test_split(test_size=0.2)

In [30]:
dataset['sentiment'].value_counts()

Unnamed: 0_level_0,count
sentiment,Unnamed: 1_level_1
positive,25000
negative,25000


In [31]:
label2id,id2label=dict(),dict()

In [32]:
label2id={'negative':0,'positive':1}
id2label={0:'negative',1:'positive'}

In [33]:
data=data.map(lambda x: {'label':label2id[x['sentiment']]})

Map:   0%|          | 0/40000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [34]:
from transformers import AutoTokenizer

In [35]:
import torch

In [36]:

device=torch.device('cuda' if torch.cuda.is_available() else torch.device('cpu'))

In [37]:
model_ckpt='huawei-noah/TinyBERT_General_4L_312D'
tokenizer=AutoTokenizer.from_pretrained(model_ckpt,use_fast=True)

In [38]:
def tokenize(batch):
  temp=tokenizer(batch['review'],padding=True,truncation=True,max_length=300)
  return temp

In [39]:
data=data.map(tokenize,batched=True,batch_size=None)

Map:   0%|          | 0/40000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [40]:
data['train'][0].keys()

dict_keys(['review', 'sentiment', 'label', 'input_ids', 'token_type_ids', 'attention_mask'])

In [41]:
columns=['input_ids','token_type_ids','attention_mask']

In [43]:
!pip install evaluate
import evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [44]:
accuracy=evaluate.load('accuracy')

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [45]:
import numpy as np

In [46]:
def compute_metrics(eval_pred):
  predictions,labels=eval_pred
  predictions=np.argmax(predictions,axis=1)
  return accuracy.compute(predictions=predictions,references=labels)

In [47]:

from transformers import AutoModelForSequenceClassification

In [48]:
import torch

In [50]:

model=AutoModelForSequenceClassification.from_pretrained(model_ckpt,num_labels=len(label2id),id2label=id2label,label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at huawei-noah/TinyBERT_General_4L_312D and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [51]:
from transformers import TrainingArguments,Trainer

In [53]:
args=TrainingArguments(
    output_dir='train_dir',
    overwrite_output_dir=True,
    evaluation_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay=0.01,
    logging_steps=50,
    num_train_epochs=3
    )

In [54]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=data['train'],
    eval_dataset=data['test'],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer
)

In [55]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: [32m[41mERROR[0m API key must be 40 characters long, yours was 32


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3121,0.311591,0.8699
2,0.2274,0.288862,0.8864
3,0.2588,0.302435,0.8871


TrainOutput(global_step=7500, training_loss=0.29724363072713217, metrics={'train_runtime': 815.8157, 'train_samples_per_second': 147.092, 'train_steps_per_second': 9.193, 'total_flos': 1008210672000000.0, 'train_loss': 0.29724363072713217, 'epoch': 3.0})

In [57]:
trainer.evaluate()

{'eval_loss': 0.302434504032135,
 'eval_accuracy': 0.8871,
 'eval_runtime': 27.2133,
 'eval_samples_per_second': 367.467,
 'eval_steps_per_second': 22.967,
 'epoch': 3.0}

In [66]:
trainer.save_model('tinybert_sentiment_analysis')
from transformers import pipeline
classifier=pipeline('sentiment-analysis',model='tinybert_sentiment_analysis',device=device)

Device set to use cuda


In [58]:
from huggingface_hub import login

login("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")

In [64]:

data['review']

'The movie was watchable while Nicolson was on the screen. However, I had to fight against passing out from boredom when the film depended on Meryl Streep to carry scenes without Jack; she was as bland as could be. The relationship between the characters was nothing special; these characters have been portrayed before -- and much better. It felt like a based-on-real-life scenario in the absolute worst sense: 90% of daily life is boring, and not worth writing about or watching. Why Ephron felt her life and relationship with Carl Bernstein was interesting enough to write about escapes me. Perhaps she wrote it as therapy -- for many writers, putting an episode from their life on paper is cathartic. Fine: but then why anyone in Hollywood felt this story was worth filming remains a mystery to me.'

In [67]:
classifier(data['review'])

[{'label': 'negative', 'score': 0.9938166737556458}]

In [69]:
model.push_to_hub("tinybert_sentiment_analysis", use_auth_token=True)


model.safetensors:   0%|          | 0.00/57.4M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Prahaladha/tinybert_sentiment_analysis/commit/de603f4056280f7482b9a32cf3ad8cf691737b4c', commit_message='Upload BertForSequenceClassification', commit_description='', oid='de603f4056280f7482b9a32cf3ad8cf691737b4c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Prahaladha/tinybert_sentiment_analysis', endpoint='https://huggingface.co', repo_type='model', repo_id='Prahaladha/tinybert_sentiment_analysis'), pr_revision=None, pr_num=None)

In [68]:
trainer.save_model("tinybert_sentiment_analysis")
tokenizer.save_pretrained("tinybert_sentiment_analysis")


('tinybert_sentiment_analysis/tokenizer_config.json',
 'tinybert_sentiment_analysis/special_tokens_map.json',
 'tinybert_sentiment_analysis/vocab.txt',
 'tinybert_sentiment_analysis/added_tokens.json',
 'tinybert_sentiment_analysis/tokenizer.json')

In [70]:
tokenizer.save_pretrained("tinybert_sentiment_analysis")
tokenizer.push_to_hub("tinybert_sentiment_analysis")


README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Prahaladha/tinybert_sentiment_analysis/commit/66cc5776233f369e135d9128d076b28ce280fb8f', commit_message='Upload tokenizer', commit_description='', oid='66cc5776233f369e135d9128d076b28ce280fb8f', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Prahaladha/tinybert_sentiment_analysis', endpoint='https://huggingface.co', repo_type='model', repo_id='Prahaladha/tinybert_sentiment_analysis'), pr_revision=None, pr_num=None)

In [71]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis", model="Prahaladha/tinybert_sentiment_analysis")
result = classifier("The movie was fantastic!")
print(result)


config.json:   0%|          | 0.00/903 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/57.4M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.30k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Device set to use cuda:0


[{'label': 'positive', 'score': 0.9931487441062927}]


In [2]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load the model from Hugging Face Hub
model = AutoModelForSequenceClassification.from_pretrained("Prahaladha/tinybert_sentiment_analysis")
tokenizer = AutoTokenizer.from_pretrained("Prahaladha/tinybert_sentiment_analysis")



In [1]:
import os
print("Current Working Directory:", os.getcwd())


Current Working Directory: c:\Users\bored\Music\Model_Deployment


In [4]:
with open("test.txt", "w") as f:
    f.write("Testing write permissions")


In [3]:
model.save_pretrained('tinybert_sentiment_analysis')
tokenizer.save_pretrained('tinybert_sentiment_analysis')


('tinybert_sentiment_analysis\\tokenizer_config.json',
 'tinybert_sentiment_analysis\\special_tokens_map.json',
 'tinybert_sentiment_analysis\\vocab.txt',
 'tinybert_sentiment_analysis\\added_tokens.json',
 'tinybert_sentiment_analysis\\tokenizer.json')

In [73]:
classifier=pipeline('sentiment-analysis',model='Sentiment_analysis_download',device=device)

Device set to use cuda


In [74]:
classifier("The movie was fantastic!")

[{'label': 'positive', 'score': 0.9931487441062927}]

In [1]:
import boto3

In [2]:
s3=boto3.client('s3')

In [3]:
bucket_name='tinybertsentimentanalysis'

In [4]:
def create_bucket(bucket_name):
  s3.create_bucket(Bucket=bucket_name)
  print('Bucket is created')

In [5]:
create_bucket(bucket_name)

Bucket is created


In [6]:
def upload_file(file_path,object_name):
  if object_name is None:
    object_name=os.path.basename(file_path)
  s3.upload_file(file_path,bucket_name,object_name)

In [7]:
import os

In [8]:
def upload_directory(directory_path,s3_prefix):
  for root,dirs,files in os.walk(directory_path):
    for file in files:
      file_path=os.path.join(root,file).replace('\\','/')
      real_path=os.path.relpath(file_path,directory_path)
      print(real_path)
      s3_key=os.path.join(s3_prefix,real_path).replace('\\','/')
      s3.upload_file(file_path,bucket_name,s3_key)

In [9]:
bucket_name = "tinybertsentimentanalysis"
directory_path = "tinybert_sentiment_analysis"


In [10]:
import os

In [11]:
%pwd

'c:\\Users\\bored\\Music\\Model_Deployment'

In [12]:
upload_directory('tinybert_sentiment_analysis','ml-models')

config.json
model.safetensors
special_tokens_map.json
tokenizer.json
tokenizer_config.json
vocab.txt
