In [1]:
import torch



In [2]:
import pandas as pd

In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [4]:
from transformers import pipeline

In [5]:
from tqdm import tqdm

In [6]:
tqdm.pandas()

In [7]:
classifier = pipeline(model = "facebook/bart-large-mnli")

In [8]:
import re

In [9]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'@\w+', '', text)  # Remove mentions
    text = re.sub(r'#\w+', '', text)  # Remove hashtags
    text = re.sub(r'[^\w\s]', '', text)  # Remove special characters and punctuation
    return text

In [10]:
def classify_and_extract_scores(text, input_labels):
    result = classifier(text, input_labels, multi_label=True)
    scores = {label: score for label, score in zip(result['labels'], result['scores'])}
    return scores

In [11]:
def classify_and_extract_confidence(text, input_labels):
    result = classifier(text, input_labels, multi_label=False)
    scores = {label: score for label, score in zip(result['labels'], result['scores'])}
    return scores

In [12]:
df = pd.read_csv('Reddit_Post_Cleaned.csv')

In [13]:
df.columns

Index(['Username', 'PostTitle', 'Date', 'Full_Post', 'PostURL', 'Upvotes',
       'WordCount', 'Cleaned_Full_Post'],
      dtype='object')

In [14]:
df.isna().sum()

Username             0
PostTitle            0
Date                 0
Full_Post            0
PostURL              0
Upvotes              0
WordCount            0
Cleaned_Full_Post    0
dtype: int64

In [15]:
df.head()

Unnamed: 0,Username,PostTitle,Date,Full_Post,PostURL,Upvotes,WordCount,Cleaned_Full_Post
0,u/sc1234,Fibonacci sequence approach,2012-12-11,Thanks to JoshTay for setting this up. Here's ...,http://www.reddit.com/r/Petioles/comments/14ow...,55,243,Thanks JoshTay setting . 's comment made earli...
1,u/[deleted],Please help.,2012-12-12,Alright i just want to start right off the bat...,http://www.reddit.com/r/Petioles/comments/14r4...,0,129,Alright want start right bat saying understand...
2,u/IVtheGREEN,Everything is easy in thought.,2012-12-12,I have no desire to stop smoking. I am a 17 ye...,http://www.reddit.com/r/Petioles/comments/14r4...,8,129,desire stop smoking . 17 year old honor societ...
3,u/Cog-Rat,Balance Makes Perfect,2012-12-12,Iv'e found that when I rush home and pack my f...,http://www.reddit.com/r/Petioles/comments/14r3...,38,192,Iv ' e found rush home pack first bowl seconds...
4,u/BitJuNkiE,Highschool conflicts,2012-12-12,"Hey r/petioles, I have some questions I would ...",http://www.reddit.com/r/Petioles/comments/14qt...,1,74,"Hey r/petioles , questions would like second o..."


In [16]:
df = df[['Username', 'PostTitle','Date','Full_Post','PostURL','Upvotes','WordCount','Cleaned_Full_Post']]

In [17]:
df['full_text'] = df['PostTitle'] + ' ' + df['Full_Post']

In [18]:
df['word_counts'] = df['full_text'].apply(lambda n: len(n.split()))

In [19]:
df['cleaned_text'] = df['full_text'].apply(clean_text)

In [20]:
df

Unnamed: 0,Username,PostTitle,Date,Full_Post,PostURL,Upvotes,WordCount,Cleaned_Full_Post,full_text,word_counts,cleaned_text
0,u/sc1234,Fibonacci sequence approach,2012-12-11,Thanks to JoshTay for setting this up. Here's ...,http://www.reddit.com/r/Petioles/comments/14ow...,55,243,Thanks JoshTay setting . 's comment made earli...,Fibonacci sequence approach Thanks to JoshTay ...,246,fibonacci sequence approach thanks to joshtay ...
1,u/[deleted],Please help.,2012-12-12,Alright i just want to start right off the bat...,http://www.reddit.com/r/Petioles/comments/14r4...,0,129,Alright want start right bat saying understand...,Please help. Alright i just want to start righ...,131,please help alright i just want to start right...
2,u/IVtheGREEN,Everything is easy in thought.,2012-12-12,I have no desire to stop smoking. I am a 17 ye...,http://www.reddit.com/r/Petioles/comments/14r4...,8,129,desire stop smoking . 17 year old honor societ...,Everything is easy in thought. I have no desir...,134,everything is easy in thought i have no desire...
3,u/Cog-Rat,Balance Makes Perfect,2012-12-12,Iv'e found that when I rush home and pack my f...,http://www.reddit.com/r/Petioles/comments/14r3...,38,192,Iv ' e found rush home pack first bowl seconds...,Balance Makes Perfect Iv'e found that when I r...,195,balance makes perfect ive found that when i ru...
4,u/BitJuNkiE,Highschool conflicts,2012-12-12,"Hey r/petioles, I have some questions I would ...",http://www.reddit.com/r/Petioles/comments/14qt...,1,74,"Hey r/petioles , questions would like second o...","Highschool conflicts Hey r/petioles, I have so...",76,highschool conflicts hey rpetioles i have some...
...,...,...,...,...,...,...,...,...,...,...,...
17005,u/KaKapowpow,What are your favorite tools for moderation?,2022-12-31,I have noticed that my smoking has become more...,https://www.reddit.com/r/Petioles/comments/100...,31,264,noticed smoking become habit anything 'm const...,What are your favorite tools for moderation? I...,271,what are your favorite tools for moderation i ...
17006,u/TimelyInspection4129,Edibles,2022-12-31,I’m here at 4 weeks with no joint/blunts! Howe...,https://www.reddit.com/r/Petioles/comments/100...,15,73,"’ 4 weeks joint/blunts ! However , ’ stuck 10m...",Edibles I’m here at 4 weeks with no joint/blun...,74,edibles im here at 4 weeks with no jointblunts...
17007,u/VampireNugz,How a year of excess weed changed me for the b...,2022-12-31,Worked at a dispensary and made it a goal to s...,https://www.reddit.com/r/Petioles/comments/100...,284,651,Worked dispensary made goal smoke much wanted ...,How a year of excess weed changed me for the b...,664,how a year of excess weed changed me for the b...
17008,u/throwawayadvice102,How did CBD help you with THC withdrawals and ...,2022-12-31,How did CBD help you with THC withdrawals and ...,https://www.reddit.com/r/Petioles/comments/100...,7,273,CBD help THC withdrawals months following ? 'm...,How did CBD help you with THC withdrawals and ...,293,how did cbd help you with thc withdrawals and ...


In [21]:
filtered_df = df[(df['word_counts'] <= 600)]

In [22]:
input_labels = ['Consumption for Recreational Use such as enjoyment and relaxation',
'Consumption due to Social Influence such as peer pressure or social group',
'Consumption for Curiosity experimentation or initial trial',
'Consumption for Sleep Improvement',
'Consumption for Stress Relief',
'Consumption for Medical Use including Pain relief anxiety depression management',
'Consumption reason not mentioned']

In [23]:
tqdm.pandas()

In [None]:
filtered_df.loc[:, 'confidence_consumption_reasons_scores'] = filtered_df ['full_text'].progress_apply(lambda x: classify_and_extract_scores(x, input_labels))

  2%|▊                                 | 394/16419 [5:34:15<69:20:46, 15.58s/it]

In [None]:
filtered_df

In [None]:
input_labels = ['Effective reduction in consumption by maintaining lower use',
'Complete cessation or stooping the use of cannabis',
'Returning to previous levels of use after attempting to quit',
'Continuous effort with varying success and challenges',
'Temporary cessation followed by periods of use',
'Desire to change without actual modification in habits',
'No outcome mentioned' ]

In [None]:
filtered_df.loc[:, 'confidence_quitting_reasons_scores'] = filtered_df ['full_text'].progress_apply(lambda x: classify_and_extract_scores(x, input_labels))

In [None]:
filtered_df