<a href="https://colab.research.google.com/github/Raghuraj-stack/Sentiment-Analysis-of-Comments-through-e-Consultation-Module/blob/main/SentimentAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ijson




In [None]:
import ijson
import pandas as pd

records = []

with open("CUADv1.json", "r", encoding="utf-8") as f:

    objects = ijson.items(f, 'data.item')
    for contract in objects:
        for para in contract.get("paragraphs", []):
            context = para.get("context", "")
            for qa in para.get("qas", []):
                question = qa.get("question", "")
                answers = [a.get("text", "") for a in qa.get("answers", [])]
                answer = answers[0] if answers else ""

                records.append({
                    "contract_text": context,
                    "question": question,
                    "answer": answer
                })

df_cuad = pd.DataFrame(records)
print("CUAD loaded with shape:", df_cuad.shape)
print(df_cuad.head())


CUAD loaded with shape: (20910, 3)
                                       contract_text  \
0  EXHIBIT 10.6\n\n                              ...   
1  EXHIBIT 10.6\n\n                              ...   
2  EXHIBIT 10.6\n\n                              ...   
3  EXHIBIT 10.6\n\n                              ...   
4  EXHIBIT 10.6\n\n                              ...   

                                            question  \
0  Highlight the parts (if any) of this contract ...   
1  Highlight the parts (if any) of this contract ...   
2  Highlight the parts (if any) of this contract ...   
3  Highlight the parts (if any) of this contract ...   
4  Highlight the parts (if any) of this contract ...   

                                              answer  
0                              DISTRIBUTOR AGREEMENT  
1                                        Distributor  
2                        7th day of September, 1999.  
3  The term of this  Agreement  shall be ten (10)...  
4  The term of t

In [None]:
def classify_intent(text):
    text = text.lower()
    if any(word in text for word in ["issue", "problem", "unfair", "concern"]):
        return "Complaint"
    elif any(word in text for word in ["should", "recommend", "suggest", "better"]):
        return "Suggestion"
    elif any(word in text for word in ["agree", "support", "approve", "endorse"]):
        return "Approval"
    elif any(word in text for word in ["why", "how", "when", "what if"]):
        return "Query"
    else:
        return "Other"


df_cuad["intent"] = df_cuad["question"].apply(classify_intent)

print(df_cuad[["question", "intent"]].head(10))


                                            question      intent
0  Highlight the parts (if any) of this contract ...  Suggestion
1  Highlight the parts (if any) of this contract ...  Suggestion
2  Highlight the parts (if any) of this contract ...  Suggestion
3  Highlight the parts (if any) of this contract ...  Suggestion
4  Highlight the parts (if any) of this contract ...  Suggestion
5  Highlight the parts (if any) of this contract ...  Suggestion
6  Highlight the parts (if any) of this contract ...  Suggestion
7  Highlight the parts (if any) of this contract ...  Suggestion
8  Highlight the parts (if any) of this contract ...  Suggestion
9  Highlight the parts (if any) of this contract ...  Suggestion


In [None]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize


try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')
try:
    stopwords.words('english')
except LookupError:
    nltk.download('stopwords')
try:
    nltk.data.find('tokenizers/punkt_tab')
except LookupError:
    nltk.download('punkt_tab')


df_sigma = pd.read_csv("SigmaLaw-ABSA.csv")
df_new = pd.read_csv("legal_text_classification.csv")


df_sigma_std = df_sigma.rename(columns={
    'Sentence': 'text',
    'Overall Sentiment': 'sentiment'
})
df_new_std = df_new.rename(columns={
    'case_text': 'text',
    'case_outcome': 'sentiment'
})
df_sigma_std = df_sigma_std[['text', 'sentiment']]
df_new_std = df_new_std[['text', 'sentiment']]
print(" Columns standardized successfully.")


stop_words = set(stopwords.words('english'))
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>+', '', text)
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    words = word_tokenize(text)
    filtered_words = [word for word in words if word not in stop_words]
    return " ".join(filtered_words)

df_sigma_std['text'] = df_sigma_std['text'].apply(clean_text)
df_new_std['text'] = df_new_std['text'].apply(clean_text)
print("Text cleaning applied.")


df_final = pd.concat([df_sigma_std, df_new_std], ignore_index=True)


def map_sentiment(value):
    try:

        numeric_value = int(value)
        if numeric_value == 0:
            return 'Neutral'
        elif numeric_value == -1:
            return 'Negative'
        elif numeric_value == 1:
            return 'Positive'
        else:
            return str(value)
    except (ValueError, TypeError):

        return str(value)

df_final['sentiment'] = df_final['sentiment'].apply(map_sentiment)
print("Numeric sentiments mapped to text labels.")


df_final.dropna(subset=['text'], inplace=True)
df_final = df_final[df_final['text'].str.strip() != '']
df_final.to_csv("sentiment_data_preprocessed.csv", index=False)

print("\nPreprocessing is complete.")
print(f"Total rows in the final dataset: {len(df_final)}")
print("File saved as 'sentiment_data_preprocessed.csv'")
print("\nFinal Data Preview with Text Labels:")
print(df_final.head())
print(df_final.tail())

 Columns standardized successfully.
Text cleaning applied.
Numeric sentiments mapped to text labels.

Preprocessing is complete.
Total rows in the final dataset: 26982
File saved as 'sentiment_data_preprocessed.csv'

Final Data Preview with Text Labels:
                                                text sentiment
0  petitioner jae lee moved united states south k...   Neutral
1  years spent country never returned south korea...   Neutral
2     years spent country never returned south korea   Neutral
3                                years spent country   Neutral
4  federal officials received tip confidential in...  Negative
                                                    text      sentiment
26980  confined persons control company extends benef...          cited
26981  threshold prescribed satisfied discretion whet...          cited
26982  threshold prescribed satisfied discretion whet...          cited
26983  given extent deumer stands gain proceedings su...  distinguished
26984  v

In [None]:

sentiment_counts = df_final['sentiment'].value_counts()
print(sentiment_counts)

In [None]:

sentiment_labels = ['Positive', 'Negative', 'Neutral']


df_sentiments = df_final[df_final['sentiment'].isin(sentiment_labels)]


df_intents = df_final[~df_final['sentiment'].isin(sentiment_labels)]



print("Data has been separated.")

print("\n--- Sentiment Dataset ---")
print(f"Shape: {df_sentiments.shape}")
print("Labels:", df_sentiments['sentiment'].unique().tolist())
print(df_sentiments.head(10))

print("\n--- Intent Dataset ---")
print(f"Shape: {df_intents.shape}")
print("Labels:", df_intents['sentiment'].unique().tolist())
print(df_intents.head(10))



Data has been separated.

--- Sentiment Dataset ---
Shape: (1997, 2)
Labels: ['Neutral', 'Negative', 'Positive']
                                                text sentiment
0  petitioner jae lee moved united states south k...   Neutral
1  years spent country never returned south korea...   Neutral
2     years spent country never returned south korea   Neutral
3                                years spent country   Neutral
4  federal officials received tip confidential in...  Negative
5  federal officials received tip confidential in...  Positive
6               lee sold informant ecstasy marijuana  Negative
7  obtaining warrant officials searched lees hous...  Negative
8                      officials searched lees house   Neutral
9                      found drugs cash loaded rifle  Positive

--- Intent Dataset ---
Shape: (24985, 2)
Labels: ['cited', 'applied', 'followed', 'referred to', 'related', 'considered', 'discussed', 'distinguished', 'affirmed', 'approved']
                 

In [None]:
GIT_USERNAME = "Raghuraj-stack"
GIT_TOKEN = "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"
GIT_REPO = "Sentiment-Analysis-of-Comments-through-e-Consultation-Module"
!git clone https://{GIT_USERNAME}:{GIT_TOKEN}@github.com/{GIT_USERNAME}/{GIT_REPO}.git
!mv sentiment_data_preprocessed.csv {GIT_REPO}/
%cd {GIT_REPO}
!git config --global user.name "<Raghuraj-stack>"
!git config --global user.email "<studyweb111@gmail.com>"
!git add sentiment_data_preprocessed.csv
!git commit -m "Add preprocessed sentiment and intent dataset"
!git push

Cloning into 'Sentiment-Analysis-of-Comments-through-e-Consultation-Module'...
remote: Enumerating objects: 9, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 9 (delta 0), reused 3 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (9/9), 7.92 MiB | 11.17 MiB/s, done.
/content/Sentiment-Analysis-of-Comments-through-e-Consultation-Module/Sentiment-Analysis-of-Comments-through-e-Consultation-Module
On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean
Everything up-to-date


In [1]:
import pandas as pd

# 1. Load the main preprocessed file you created.
# Ensure the filename here exactly matches the one you saved.
df_main = pd.read_csv("sentiment_data_preprocessed.csv")

# 2. Separate the data into sentiments and intents, just as we did before.
# Define which labels are considered sentiments.
sentiment_labels = ['Positive', 'Negative', 'Neutral']

# Create the final DataFrame for our sentiment analysis task.
df_sentiments = df_main[df_main['sentiment'].isin(sentiment_labels)].copy()

# --- Verification ---
print("Sentiment-only data is ready for analysis!")
print(f"Shape: {df_sentiments.shape}")
print(df_sentiments.head())

Sentiment-only data is ready for analysis!
Shape: (1997, 2)
                                                text sentiment
0  petitioner jae lee moved united states south k...   Neutral
1  years spent country never returned south korea...   Neutral
2     years spent country never returned south korea   Neutral
3                                years spent country   Neutral
4  federal officials received tip confidential in...  Negative


In [2]:
# 1. Install necessary libraries


#sentiment classification using - 3-label sentiment model




# We add 'accelerate' which can help speed up the model.
!pip install transformers[torch] accelerate

import pandas as pd
from transformers import pipeline
from sklearn.metrics import classification_report

# 2. Load and prepare your sentiment data
# This ensures we are starting from a clean slate.
df_main = pd.read_csv("sentiment_data_preprocessed.csv")
sentiment_labels = ['Positive', 'Negative', 'Neutral']
df_sentiments = df_main[df_main['sentiment'].isin(sentiment_labels)].copy()


# 3. Load the 3-label sentiment analysis model
print("Loading the sentiment model (this might take a moment)...")
# This model is specifically trained for Positive, Negative, and Neutral sentiment.
sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model="cardiffnlp/twitter-roberta-base-sentiment-latest"
)
print("Model loaded successfully!")


# 4. Prepare data and run predictions
texts_to_analyze = df_sentiments['text'].tolist()
actual_labels = df_sentiments['sentiment'].tolist()

print("Running predictions on the dataset...")
predictions = sentiment_pipeline(texts_to_analyze)
print("Predictions complete!")


# 5. Process the results
# The model's output labels are lowercase, so we capitalize them to match your data.
predicted_labels = [pred['label'].capitalize() for pred in predictions]

# Add the new predictions to our DataFrame for comparison
df_sentiments['predicted_sentiment'] = predicted_labels


# 6. Evaluate the model's performance
print("\n--- Model Performance Report ---")
# This report shows how accurately the model predicted each category.
print(classification_report(actual_labels, predicted_labels))


# 7. View the results
print("\n--- Data with Predictions ---")
print(df_sentiments[['text', 'sentiment', 'predicted_sentiment']].head(10))

Loading the sentiment model (this might take a moment)...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cpu


Model loaded successfully!
Running predictions on the dataset...
Predictions complete!

--- Model Performance Report ---
              precision    recall  f1-score   support

    Negative       0.73      0.38      0.50       977
     Neutral       0.23      0.91      0.37       378
    Positive       0.80      0.01      0.02       642

    accuracy                           0.36      1997
   macro avg       0.59      0.43      0.30      1997
weighted avg       0.66      0.36      0.32      1997


--- Data with Predictions ---
                                                text sentiment  \
0  petitioner jae lee moved united states south k...   Neutral   
1  years spent country never returned south korea...   Neutral   
2     years spent country never returned south korea   Neutral   
3                                years spent country   Neutral   
4  federal officials received tip confidential in...  Negative   
5  federal officials received tip confidential in...  Positive   
6    

In [5]:
# 1. Install necessary libraries
!pip install transformers[torch] accelerate datasets scikit-learn

import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer
from datasets import Dataset

# 2. Load and prepare your intent-specific data
df_main = pd.read_csv("sentiment_data_preprocessed.csv")
sentiment_labels = ['Positive', 'Negative', 'Neutral']
df_intents = df_main[~df_main['sentiment'].isin(sentiment_labels)].copy()
df_intents.dropna(subset=['text', 'sentiment'], inplace=True)
df_intents = df_intents[df_intents['text'].apply(lambda x: isinstance(x, str))]
print("Data is clean and ready.")

# 3. Create label mappings (string to number and number to string)
unique_labels = df_intents['sentiment'].unique().tolist()
label2id = {label: i for i, label in enumerate(unique_labels)}
id2label = {i: label for i, label in enumerate(unique_labels)}

# Create a new column 'label' with the integer IDs
df_intents['label'] = df_intents['sentiment'].map(label2id)
print("Created label mappings.")

# 4. Split the data into training and testing sets
train_df, test_df = train_test_split(
    df_intents,
    test_size=0.2,       # Use 20% of the data for testing
    random_state=42,     # Ensures the split is the same every time
    stratify=df_intents['label'] # Ensures both sets have a similar distribution of labels
)

print(f"Training set size: {len(train_df)}")
print(f"Testing set size: {len(test_df)}")

# 5. Load the tokenizer
# We'll use 'distilbert-base-uncased', a fast and effective model for fine-tuning.
model_checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# 6. Create a tokenization function
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

# 7. Convert pandas DataFrames to Hugging Face Dataset objects
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

# 8. Apply the tokenization to the entire datasets
tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)

print("\nData tokenization complete!")
print("Your datasets are now ready for training.")

Data is clean and ready.
Created label mappings.
Training set size: 19847
Testing set size: 4962


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/19847 [00:00<?, ? examples/s]

Map:   0%|          | 0/4962 [00:00<?, ? examples/s]


Data tokenization complete!
Your datasets are now ready for training.


In [1]:
# 1. Install necessary libraries
!pip install transformers[torch] accelerate datasets scikit-learn

import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer
from datasets import Dataset

# --- Part A: Load Data and Create Label Mappings ---

# Load and prepare your intent-specific data
df_main = pd.read_csv("sentiment_data_preprocessed.csv")
sentiment_labels = ['Positive', 'Negative', 'Neutral']
df_intents = df_main[~df_main['sentiment'].isin(sentiment_labels)].copy()
df_intents.dropna(subset=['text', 'sentiment'], inplace=True)
df_intents = df_intents[df_intents['text'].apply(lambda x: isinstance(x, str))]
print("Data is clean and ready.")

# Create the "secret code" to map string labels to numbers (and back)
unique_labels = df_intents['sentiment'].unique().tolist()
label2id = {label: i for i, label in enumerate(unique_labels)}
id2label = {i: label for i, label in enumerate(unique_labels)}

# Create a new column 'label' with the number codes
df_intents['label'] = df_intents['sentiment'].map(label2id)
print("Created label mappings.")


# --- Part B: Split Data into Training and Testing Sets ---

# Split the data: 80% for the "textbook" (training) and 20% for the "final exam" (testing)
train_df, test_df = train_test_split(
    df_intents,
    test_size=0.2,
    random_state=42,
    stratify=df_intents['label']
)
print(f"Training set size: {len(train_df)}")
print(f"Testing set size: {len(test_df)}")


# --- Part C: Load Tokenizer and Prepare Datasets ---

# Load the tokenizer that knows how to convert words to number codes for our model
model_checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Define the function that will run the tokenizer on our text
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

# Convert our pandas tables into a special format called a Hugging Face Dataset
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

# Run the tokenizer on both datasets
tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)

print("\nData tokenization complete!")
print("Your datasets are now ready for the main training step.")

Data is clean and ready.
Created label mappings.
Training set size: 11592
Testing set size: 2898


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/11592 [00:00<?, ? examples/s]

Map:   0%|          | 0/2898 [00:00<?, ? examples/s]


Data tokenization complete!
Your datasets are now ready for the main training step.


In [4]:
# 1. Install necessary libraries
!pip install evaluate

import numpy as np
import evaluate
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
import os

# Disable WANDB logging
os.environ["WANDB_DISABLED"] = "true"


# 2. Load the base model
# This loads the 'distilbert' model and configures it for our specific number of labels.
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    num_labels=len(unique_labels),
    id2label=id2label,
    label2id=label2id,
)

# 3. Define the training rules (Training Arguments)
# These are the settings for our training session.
training_args = TrainingArguments(
    output_dir="my_intent_model",             # Directory to save the trained model
    learning_rate=2e-5,                        # A standard learning rate for fine-tuning
    per_device_train_batch_size=16,            # Number of examples per batch for training
    per_device_eval_batch_size=16,             # Number of examples per batch for evaluation
    num_train_epochs=3,                        # We'll train the model for 3 full passes over the data
    weight_decay=0.01,                         # A standard value for regularization
    eval_strategy="epoch",               # Evaluate the model at the end of each epoch
    save_strategy="epoch",                     # Save the model at the end of each epoch
    load_best_model_at_end=True,               # Load the best performing model at the end
)

# 4. Define how to calculate metrics
# This function will calculate the accuracy of our model during evaluation.
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# 5. Create the Trainer
# The Trainer is the main tool that handles the entire training process.
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# 6. Start the training!
print("Starting the fine-tuning process...")
trainer.train()
print("Training complete!")



Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Starting the fine-tuning process...


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mstudyweb111[0m ([33mstudyweb111-github[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,1.5889,1.52315,0.488268
2,1.4957,1.458361,0.504141
3,1.3484,1.444,0.513112


Training complete!


In [7]:
# This command creates a zip file from your model folder
!zip -r /content/my_intent_model.zip /content/my_intent_model

  adding: content/my_intent_model/ (stored 0%)
  adding: content/my_intent_model/checkpoint-2175/ (stored 0%)
  adding: content/my_intent_model/checkpoint-2175/scheduler.pt (deflated 62%)
  adding: content/my_intent_model/checkpoint-2175/training_args.bin (deflated 53%)
  adding: content/my_intent_model/checkpoint-2175/optimizer.pt (deflated 22%)
  adding: content/my_intent_model/checkpoint-2175/rng_state.pth (deflated 26%)
  adding: content/my_intent_model/checkpoint-2175/tokenizer.json (deflated 71%)
  adding: content/my_intent_model/checkpoint-2175/trainer_state.json (deflated 66%)
  adding: content/my_intent_model/checkpoint-2175/config.json (deflated 53%)
  adding: content/my_intent_model/checkpoint-2175/model.safetensors (deflated 8%)
  adding: content/my_intent_model/checkpoint-2175/special_tokens_map.json (deflated 42%)
  adding: content/my_intent_model/checkpoint-2175/tokenizer_config.json (deflated 75%)
  adding: content/my_intent_model/checkpoint-2175/vocab.txt (deflated 53%

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
# This command copies all files and folders recursively
# from the /content/ directory to a new folder in your Drive.
!cp -r /content/* "/content/drive/MyDrive/Colab_Project_Backup/"

cp: target '/content/drive/MyDrive/Colab_Project_Backup/' is not a directory


In [11]:
# Navigate into your repository's directory
# Replace <YOUR_REPOSITORY_NAME> with your actual repo name
%cd /content/Sentiment-Analysis-of-Comments-through-e-Consultation-Module/

# Add all new and modified files to be tracked by Git
!git add .

# Commit the changes with a descriptive message
!git commit -m "Complete and save fine-tuned intent classification model"

# Push all the committed changes to your GitHub repository
!git push

[Errno 2] No such file or directory: '/content/Sentiment-Analysis-of-Comments-through-e-Consultation-Module/'
/content
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git


In [12]:
# First, make sure you are in the correct directory
%cd /content/Sentiment-Analysis-of-Comments-through-e-Consultation-Module/

# Now, check the status
!git status

[Errno 2] No such file or directory: '/content/Sentiment-Analysis-of-Comments-through-e-Consultation-Module/'
/content
fatal: not a git repository (or any of the parent directories): .git


In [13]:
# Step 1: Check your current location
print("--- Checking current directory ---")
!pwd

# Step 2: List all folders and files in the current location
# You should see your repository folder name listed here.
print("\n--- Listing available folders ---")
!ls

# Step 3: Change into your repository folder
# Make sure the repository name is correct!
print("\n--- Moving into the correct directory ---")
%cd /content/Sentiment-Analysis-of-Comments-through-e-Consultation-Module/

# Step 4: Now, try to add, commit, and push again
print("\n--- Retrying the push to GitHub ---")
!git add .
!git commit -m "Final version with trained models"
!git push

--- Checking current directory ---
/content

--- Listing available folders ---
CUADv1.json		       sample_data
drive			       sentiment_data_preprocessed.csv
legal_text_classification.csv  SigmaLaw-ABSA.csv
my_intent_model		       wandb
my_intent_model.zip

--- Moving into the correct directory ---
[Errno 2] No such file or directory: '/content/Sentiment-Analysis-of-Comments-through-e-Consultation-Module/'
/content

--- Retrying the push to GitHub ---
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git


In [14]:
# --- Part A: Re-Clone the Repository ---
# Replace these with your actual details
GIT_USERNAME = "<Raghuraj-stack>"
GIT_TOKEN = "<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>"
GIT_REPO = "<Sentiment-Analysis-of-Comments-through-e-Consultation-Module>"

print("Cloning your repository from GitHub...")
!git clone https://{GIT_USERNAME}:{GIT_TOKEN}@github.com/{GIT_USERNAME}/{GIT_REPO}.git


# --- Part B: Copy Your Work into the Repository ---
print("\nCopying your saved model and data into the repo...")
# Copy the saved model folder
!cp -r /content/my_intent_model "{GIT_REPO}/"
# Copy the preprocessed data file
!cp /content/sentiment_data_preprocessed.csv "{GIT_REPO}/"


# --- Part C: Push Everything to GitHub ---
print("\nPreparing to push to GitHub...")
# Navigate into the repository folder
%cd {GIT_REPO}

# Configure Git with your name and email
!git config --global user.name "<Raghuraj-stack>"
!git config --global user.email "<studyweb111@gmail.com>"

# Add, commit, and push all files
!git add .
!git commit -m "Add final fine-tuned model and preprocessed data"
!git push

Cloning your repository from GitHub...
/bin/bash: line 1: Raghuraj-stack: No such file or directory

Copying your saved model and data into the repo...

Preparing to push to GitHub...
/content/<Sentiment-Analysis-of-Comments-through-e-Consultation-Module>
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
fatal: not a git repository (or any of the parent directories): .git
