In [1]:
import pandas as pd
import pandas as pd
from tqdm import tqdm
import torch
from torch.utils.data import Dataset
import random

from transformers import RobertaTokenizer, T5ForConditionalGeneration, DataCollatorWithPadding

In [2]:
path_raw = '/home/st-aleksandr-razin/workspace/SRC_QC4QA/data/summarization/raw/python_QA_text_code_raw.csv'
path_code = '/home/st-aleksandr-razin/workspace/SRC_QC4QA/data/summarization/processed/code_question_df.csv'
path_described_code = '/home/st-aleksandr-razin/workspace/SRC_QC4QA/data/summarization/processed/code_question_described_df.csv'
question_with_code_description_path = '/home/st-aleksandr-razin/workspace/SRC_QC4QA/data/summarization/processed/question_with_code_description.csv'

In [3]:
class CodeDataset(Dataset):
    def __init__(self, code_question_df, tokenizer):
        self.code_snippets = code_question_df['code_snippet'].tolist()
        self.indices = torch.arange(len(self.code_snippets))
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.code_snippets)

    def __getitem__(self, index):
        code_snippet = self.code_snippets[index]
        encoded_input = self.tokenizer.encode_plus(
            code_snippet,
            padding=True,
            truncation=True,
            return_tensors="pt"
        )
        input_ids = encoded_input["input_ids"]
        return {"input_ids": input_ids[0], "index": index}

In [4]:
class DataPreprocessing:
    def __init__(self, df=None, columns=['Id_Q', 'Title_Q', 'Body_Q', 'Tags_Q', 'Code_Q']):
        self.df = df[columns].drop_duplicates()
    
    def preprocess_tags(self):
        self.df['Tags_Q'] = self.df['Tags_Q'].apply(lambda x: x.split(','))
    
    def indexing_code(self):
        self.code_df = self.df[self.df['Code_Q']][['Id_Q']]
    
    def get_code_question_dataframe(self):
        code_question_data = []
        start_tag = '<pre><code>'
        end_tag = '</code></pre>'
        
        for index, row in tqdm(self.df.iterrows()):
            if row['Id_Q'] in self.code_df['Id_Q'].values and pd.notnull(row['Body_Q']):
                text = row['Body_Q']
                start_index = text.find(start_tag)
                pos_code = 0
                while start_index != -1:
                    end_index = text.find(end_tag, start_index)
                    
                    if end_index != -1:
                        code_snippet = text[start_index + len(start_tag):end_index]
                        code_question_data.append({'code_id': pos_code, 'code_snippet': code_snippet,'question_id': row['Id_Q']})
                        pos_code += 1
                        start_index = text.find(start_tag, end_index)
                    else:
                        break
        code_question_df = pd.DataFrame(code_question_data)
        return code_question_df
    
    def generate_code_descriptions(self, tokenizer, model, code_question_df, batch_size=64):
        code_question_df['code_description'] = ''
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = model.to(device)

        dataset = CodeDataset(code_question_df, tokenizer)
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, collate_fn=DataCollatorWithPadding(tokenizer, padding=True))

        for batch in tqdm(dataloader):
            #batch_indices = batch_indices.tolist()

            with torch.no_grad():
                generated_ids = model.generate(batch["input_ids"].to(device), max_length=20)

            descriptions = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
            
            for i, description in enumerate(descriptions):
                index = int(batch['index'][i])
                code_question_df.loc[index, 'code_description'] = str(description)

        return code_question_df

    def substitute_code_with_description(self, code_df):
        for row in tqdm(code_df.itertuples()):
            code_id = row.code_id
            question_id = row.question_id
            description = row.code_description
            code_snip = row.code_snippet
            
            # Find the corresponding question based on question ID
            
            try:
                question = self.df.loc[self.df['Id_Q'] == question_id, 'Body_Q'].values[0]
            except:
                print(self.df.loc[self.df['Id_Q'] == question_id, 'Body_Q'])
                print(self.df.loc[self.df['Id_Q'] == question_id, 'Body_Q'].values[0])
            
            # Replace the code with its description in the question
            question = question.replace(f'<pre><code>{code_snip}</code></pre>', f'code description start: {description} code description end')
            
            # Update the question in the question DataFrame
            self.df.loc[self.df['Id_Q'] == question_id, 'Body_Q'] = question
        
        return self.df

In [6]:
df = pd.read_csv(path_raw, index_col=0)
df = df.sample(100)

In [7]:
df

Unnamed: 0,Id_Q,AcceptedAnswerId_Q,CreationDate_Q,Score_Q,ViewCount_Q,Body_Q,Title_Q,Tags_Q,AnswerCount_Q,CommentCount_Q,Link_Q,Code_Q,Image_Q,Id_A,CreationDate_A,Score_A,Body_A,Link_A,Code_A,Image_A
82770,6841853,6842138.0,2011-07-27 09:05:31.300000,20,24479,"<p>I'm currently learning Python, and I have t...","accessing ""module scope"" vars","python,variables,module,scope,coding-style",4,1,False,True,False,6842102,2011-07-27 09:29:17.003000,1,<p>Avoid setting globals at all. You can creat...,False,False,False
124453,24689331,,2014-07-11 02:47:27.143000,0,1675,"<p>I'm a beginner to flask, following the inst...",no module named flask,"python,flask",2,3,False,True,False,24689931,2014-07-11 04:04:28.980000,0,<h1>Virtualenv is always activated locally for...,False,False,False
255427,65341580,,2020-12-17 13:23:06.773000,0,65,<p>I want to kick members that have a specific...,discord py - How do I check if a member has a ...,"python,python-3.x,discord.py,discord.py-rewrite",1,2,False,True,False,65343785,2020-12-17 15:39:05.717000,0,<p>Even though I do not believe Discord saves ...,False,False,False
11502,4180836,4180882.0,2010-11-15 01:46:21.097,19,9169,<p><strong>How practical would it be to use Cy...,Using Cython for game development?,"python,c,cython",6,2,False,False,False,17047294,2013-06-11 14:57:30.433,0,<p>I know Cython and you do not have to know C...,False,False,False
16681,7451163,7466485.0,2011-09-16 22:55:27.807,0,143,<p>My Python High Replication Datastore applic...,GAE Lookup Table Incompatible with Transactions?,"python,google-app-engine,transactions,google-c...",2,0,False,False,False,7452303,2011-09-17 03:46:07.917,1,"<p>First, if you're under the belief that a na...",False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258713,66153527,66154139.0,2021-02-11 11:06:23.463000,0,84,<p>I am trying to load a pickle file that the ...,Loading a pickle file returns 'could not find ...,"python,pickle",1,6,False,True,False,66154139,2021-02-11 11:46:09.047000,0,<p>I had accidentally deleted <code>save_game_...,False,False,False
122890,23997403,,2014-06-02 14:50:34.593000,54,70352,"<p>Lots of other threads about similar issues,...",Installed virtualenv and virtualenvwrapper: Py...,"python,macos,virtualenv,virtualenvwrapper",10,2,False,True,False,44194250,2017-05-26 05:30:26.477000,7,<p>try installing virtualenv and virtualenvwra...,False,False,False
50859,55538285,55538436.0,2019-04-05 14:59:50.507,0,49,<p>Like if i want paragraph to be on monday. A...,Is there a way to display a view by date?,"python,django",3,2,False,False,False,55538379,2019-04-05 15:03:42.680,0,<ol>\n<li>regexp on url</li>\n<li>route in vie...,False,False,False
17944,8630573,8630649.0,2011-12-25 17:08:04.547,3,9002,<p>I have a web crawling python script running...,How to pause a python script running in terminal,"python,terminal,pausing-execution",6,2,False,False,False,8631087,2011-12-25 19:10:56.307,1,<p>Since you're populating a database with you...,False,False,False


In [8]:
preprocessing = DataPreprocessing(df)
preprocessing.preprocess_tags()
preprocessing.indexing_code()

In [9]:
preprocessing.df

Unnamed: 0,Id_Q,Title_Q,Body_Q,Tags_Q,Code_Q
82770,6841853,"accessing ""module scope"" vars","<p>I'm currently learning Python, and I have t...","[python, variables, module, scope, coding-style]",True
124453,24689331,no module named flask,"<p>I'm a beginner to flask, following the inst...","[python, flask]",True
255427,65341580,discord py - How do I check if a member has a ...,<p>I want to kick members that have a specific...,"[python, python-3.x, discord.py, discord.py-re...",True
11502,4180836,Using Cython for game development?,<p><strong>How practical would it be to use Cy...,"[python, c, cython]",False
16681,7451163,GAE Lookup Table Incompatible with Transactions?,<p>My Python High Replication Datastore applic...,"[python, google-app-engine, transactions, goog...",False
...,...,...,...,...,...
258713,66153527,Loading a pickle file returns 'could not find ...,<p>I am trying to load a pickle file that the ...,"[python, pickle]",True
122890,23997403,Installed virtualenv and virtualenvwrapper: Py...,"<p>Lots of other threads about similar issues,...","[python, macos, virtualenv, virtualenvwrapper]",True
50859,55538285,Is there a way to display a view by date?,<p>Like if i want paragraph to be on monday. A...,"[python, django]",False
17944,8630573,How to pause a python script running in terminal,<p>I have a web crawling python script running...,"[python, terminal, pausing-execution]",False


In [10]:
preprocessing.code_df

Unnamed: 0,Id_Q
82770,6841853
124453,24689331
255427,65341580
244058,62709539
135942,29742727
...,...
225804,58457790
70225,2291069
137436,30352616
258713,66153527


In [11]:
code_question_df = preprocessing.get_code_question_dataframe()

100it [00:00, 11160.11it/s]


In [13]:
preprocessing.df.iloc[0, 2]

'<p>I\'m currently learning Python, and I have to work on a Python 2.7 project.</p>\n\n<p>Accessing "module scope" variables in functions of the module itself is a bit confusing for me, and I didn\'t succeed in finding a satisfying way.</p>\n\n<p>My attempts so far:</p>\n\n<p><strong>Way 1:</strong></p>\n\n<p>my_module.py</p>\n\n<pre><code>my_global_var = None\n\ndef my_func():\n    global my_global_var\n    my_global_var = \'something_else\'\n</code></pre>\n\n<p>Here I think that confusing local and "module scope" vars may be quite easy.</p>\n\n<p><strong>Way 2:</strong></p>\n\n<p>my_module.py</p>\n\n<pre><code>import my_module\n\nmy_global_var = None\n\ndef my_func():\n    my_module.my_global_var = \'something_else\'\n</code></pre>\n\n<p>Here, the name of "my_module" could not be as easily changed as "way 1" when necessary. Plus, importing a module into itself sounds quite weird.</p>\n\n<p>What would you recommend? Or would you suggest something else? Thanks.</p>\n'

In [12]:
code_question_df

Unnamed: 0,code_id,code_snippet,question_id
0,0,my_global_var = None\n\ndef my_func():\n gl...,6841853
1,1,import my_module\n\nmy_global_var = None\n\nde...,6841853
2,0,$ mkdir myproject\n$ cd myproject\n$ virtualen...,24689331
3,0,if role in member.roles for 30 days:\n await...,65341580
4,0,import any_dict_01\nimport json\n\ndata = any_...,62709539
...,...,...,...
145,3,&lt;p&gt;The exit of the view was: {{ MyProjec...,30352616
146,4,"&lt;object width=""400"" height=""400"" data=""/pat...",30352616
147,0,from pprint import pprint\nimport random\nimpo...,66153527
148,0,export PATH=/usr/local/bin:$PATH\nexport PIP_D...,23997403


In [14]:
sampled_question_ids = random.sample(code_question_df['question_id'].unique().tolist(), 50)

In [15]:
sampled_question_ids

[56129786,
 51625542,
 20792049,
 24689331,
 59199224,
 25241692,
 7116526,
 23997403,
 13303449,
 55093747,
 33213057,
 61559413,
 58041059,
 46244850,
 39651222,
 23914370,
 70778175,
 12809466,
 58457790,
 13062306,
 57566344,
 17108924,
 72381032,
 44172637,
 54336776,
 6841853,
 30352616,
 71182388,
 33484107,
 2291069,
 30662920,
 45619741,
 10922134,
 14504990,
 53322171,
 5208436,
 16215605,
 62768787,
 26695219,
 7252681,
 65341580,
 64765578,
 61679864,
 32455262,
 9163640,
 22871871,
 22827671,
 44660433,
 46620035,
 28075108]

In [33]:
filtered_code_df = code_question_df[code_question_df['question_id'].isin(sampled_question_ids)]

In [34]:
filtered_code_df

Unnamed: 0,code_id,code_snippet,question_id
0,0,my_global_var = None\n\ndef my_func():\n gl...,6841853
1,1,import my_module\n\nmy_global_var = None\n\nde...,6841853
2,0,$ mkdir myproject\n$ cd myproject\n$ virtualen...,24689331
3,0,if role in member.roles for 30 days:\n await...,65341580
7,0,"def func(a, b):\n\n if a &gt;= 0 and b &gt; 0...",53322171
...,...,...,...
144,2,&lt;p&gt;The exit of the view was: &lt;?HTML_T...,30352616
145,3,&lt;p&gt;The exit of the view was: {{ MyProjec...,30352616
146,4,"&lt;object width=""400"" height=""400"" data=""/pat...",30352616
148,0,export PATH=/usr/local/bin:$PATH\nexport PIP_D...,23997403


In [37]:
tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base-multi-sum')
model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base-multi-sum')

In [38]:
filtered_code_df['code_description'] = ''
filtered_code_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_code_df['code_description'] = ''


Unnamed: 0,code_id,code_snippet,question_id,code_description
0,0,my_global_var = None\n\ndef my_func():\n gl...,6841853,
1,1,import my_module\n\nmy_global_var = None\n\nde...,6841853,
2,0,$ mkdir myproject\n$ cd myproject\n$ virtualen...,24689331,
3,0,if role in member.roles for 30 days:\n await...,65341580,
7,0,"def func(a, b):\n\n if a &gt;= 0 and b &gt; 0...",53322171,
...,...,...,...,...
144,2,&lt;p&gt;The exit of the view was: &lt;?HTML_T...,30352616,
145,3,&lt;p&gt;The exit of the view was: {{ MyProjec...,30352616,
146,4,"&lt;object width=""400"" height=""400"" data=""/pat...",30352616,
148,0,export PATH=/usr/local/bin:$PATH\nexport PIP_D...,23997403,


In [39]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [42]:
filtered_code_df.reset_index(inplace=True, drop=True)

In [43]:
dataset = CodeDataset(filtered_code_df, tokenizer)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, collate_fn=DataCollatorWithPadding(tokenizer, padding=True), shuffle=False)

In [44]:
filtered_code_df

Unnamed: 0,code_id,code_snippet,question_id,code_description
0,0,my_global_var = None\n\ndef my_func():\n gl...,6841853,
1,1,import my_module\n\nmy_global_var = None\n\nde...,6841853,
2,0,$ mkdir myproject\n$ cd myproject\n$ virtualen...,24689331,
3,0,if role in member.roles for 30 days:\n await...,65341580,
4,0,"def func(a, b):\n\n if a &gt;= 0 and b &gt; 0...",53322171,
...,...,...,...,...
110,2,&lt;p&gt;The exit of the view was: &lt;?HTML_T...,30352616,
111,3,&lt;p&gt;The exit of the view was: {{ MyProjec...,30352616,
112,4,"&lt;object width=""400"" height=""400"" data=""/pat...",30352616,
113,0,export PATH=/usr/local/bin:$PATH\nexport PIP_D...,23997403,


In [45]:
for batch in tqdm(dataloader):
    with torch.no_grad():
        generated_ids = model.generate(batch["input_ids"].to(device), max_length=20)
    descriptions = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    for i, description in enumerate(descriptions):
        index = int(batch['index'][i])
        filtered_code_df.loc[index, 'code_description'] = str(description)

100%|██████████| 8/8 [00:03<00:00,  2.05it/s]


In [46]:
filtered_code_df

Unnamed: 0,code_id,code_snippet,question_id,code_description
0,0,my_global_var = None\n\ndef my_func():\n gl...,6841853,This function is a wrapper around the global v...
1,1,import my_module\n\nmy_global_var = None\n\nde...,6841853,A function to define a function that will be c...
2,0,$ mkdir myproject\n$ cd myproject\n$ virtualen...,24689331,Create a missing system environment if it does...
3,0,if role in member.roles for 30 days:\n await...,65341580,Kick the member for 30 days.
4,0,"def func(a, b):\n\n if a &gt;= 0 and b &gt; 0...",53322171,Calculate the n - th index of a to n - th inde...
...,...,...,...,...
110,2,&lt;p&gt;The exit of the view was: &lt;?HTML_T...,30352616,Displays a message that shows the exit of the ...
111,3,&lt;p&gt;The exit of the view was: {{ MyProjec...,30352616,Exit of the view.
112,4,"&lt;object width=""400"" height=""400"" data=""/pat...",30352616,This is a simple example of how to render a si...
113,0,export PATH=/usr/local/bin:$PATH\nexport PIP_D...,23997403,Exports environment variables for n - node.


In [2]:
import pandas as pd

In [3]:
question_with_code_description = pd.read_csv('/home/st-aleksandr-razin/workspace/SRC_QC4QA/data/summarization/final/question_with_code_description.csv')
question_with_code_description

Unnamed: 0,Id_Q,Title_Q,Body_Q,Tags_Q,Code_Q
0,1476,How do you express binary literals in Python?,<p>How do you express an integer as a binary n...,"['python', 'syntax', 'binary', 'integer', 'lit...",True
1,20927,Updating an auto_now DateTimeField in a parent...,<p>I've got two models: Message and Attachment...,"['python', 'database', 'django', 'orm']",True
2,59825,How to retrieve an element from a set without ...,<p>Suppose the following:</p>\n\ncode descript...,"['python', 'set']",True
3,61605,Is it pythonic for a function to return multip...,"<p>In python, you can have a function return m...","['python', 'function', 'return-value', 'multip...",True
4,65266,Caching compiled regex objects in Python?,<p>Each time a python file is imported that co...,"['python', 'regex', 'caching']",True
...,...,...,...,...,...
49990,72498913,"Pyngrok & Flask, Your account is limited to 1 ...",<p>I trying to run both flask and ngrok in the...,"['python', 'r', 'python-3.x', 'flask', 'pyngrok']",True
49991,72500796,How to fix ModuleNotFoundError: No module name...,<p>I have currently installed the terra_sdk==2...,"['python', 'terra']",True
49992,72502170,How to drop duplicates ignoring one column,<p>I have a <code>DataFrame</code> with multip...,"['python', 'pandas']",True
49993,72502242,"django.db.utils.ProgrammingError: relation ""ap...",<p>After I deleted all the migration files and...,"['python', 'django', 'django-migrations']",True


In [6]:
import ast

In [5]:
question_with_code_description['Tags_Q'][0]

"['python', 'syntax', 'binary', 'integer', 'literals']"

In [7]:
question_with_code_description['Tags_Q'] = question_with_code_description['Tags_Q'].apply(lambda x: ast.literal_eval(x)) #convert to list type

In [8]:
question_with_code_description['Tags_Q'][0]

['python', 'syntax', 'binary', 'integer', 'literals']

In [9]:
exploded_df = question_with_code_description.explode('Tags_Q')
exploded_df.head()

Unnamed: 0,Id_Q,Title_Q,Body_Q,Tags_Q,Code_Q
0,1476,How do you express binary literals in Python?,<p>How do you express an integer as a binary n...,python,True
0,1476,How do you express binary literals in Python?,<p>How do you express an integer as a binary n...,syntax,True
0,1476,How do you express binary literals in Python?,<p>How do you express an integer as a binary n...,binary,True
0,1476,How do you express binary literals in Python?,<p>How do you express an integer as a binary n...,integer,True
0,1476,How do you express binary literals in Python?,<p>How do you express an integer as a binary n...,literals,True


In [12]:
val = pd.read_csv('/home/st-aleksandr-razin/workspace/SRC_QC4QA/QC_pipeline/dataset/train_vsl_data/val.csv')

In [25]:
for j, i in enumerate(question_with_code_description_raw['Title_Q'].values):
    if 'Update the view less often' in i:
        print(j)
        break

7368


In [19]:
question_with_code_description_raw = pd.read_csv('/home/st-aleksandr-razin/workspace/SRC_QC4QA/data/summarization/final/question_with_code_description_raw.csv')

In [26]:
print(question_with_code_description_raw.iloc[7368]['Body_Q'])

<p>I developed a program in a QMainWindow, with a QTableView based on a QSqlTableModel. The database uses sqlite. </p>

<p>This program is supposed to display a list of videos. I can normally add some tags and actor to each video (tags and actors are stored in the database). When I have a few videos displayed, everything works fine, but when the number of videos increases, my program raises an exception like that:</p>

<pre><code>Traceback (most recent call last):
File "/home/djipey/informatique/python/bibli/gui.py", line 572, in addTag
liste.addTag(vids_selected, tag_to_add.split(" "))
File "/home/djipey/informatique/python/bibli/liste.py", line 488, in addTag
bdd.commit()
sqlite3.OperationalError: database is locked
</code></pre>

<p>I'm almost sure the problem doesn't come from liste.py, because I close each connection and cursor. No, my database is busy somewhere else.</p>

<p>I assume the problem comes from the view/model when they display the informations, so I wonder if it's pos

In [24]:
print(val.iloc[-1]['Text'])

title: Update the view less often
question: <p>I developed a program in a QMainWindow, with a QTableView based on a QSqlTableModel. The database uses sqlite. </p>

<p>This program is supposed to display a list of videos. I can normally add some tags and actor to each video (tags and actors are stored in the database). When I have a few videos displayed, everything works fine, but when the number of videos increases, my program raises an exception like that:</p>

code description start: Traceback is called when the last call to addTag
 is called. code description end

<p>I'm almost sure the problem doesn't come from liste.py, because I close each connection and cursor. No, my database is busy somewhere else.</p>

<p>I assume the problem comes from the view/model when they display the informations, so I wonder if it's possible to dicrease the display rate? Otherwise, I'm open to every suggestion to fix the problem.</p>

code description start: Add a tag to list_tags. code description end

In [10]:
tags_count = exploded_df['Tags_Q'].value_counts()

In [11]:
for i, tag in zip(tags_count.index, tags_count):
    print(i, tag)

python 45884
python-3.x 6713
django 4597
python-2.7 2683
pandas 2451
numpy 1596
list 1296
tensorflow 1199
tkinter 1076
flask 1019
dictionary 807
keras 781
pip 736
regex 721
string 713
dataframe 702
selenium 684
json 660
machine-learning 632
matplotlib 615
mysql 613
csv 590
scikit-learn 574
linux 554
arrays 553
function 540
google-app-engine 506
class 495
sockets 494
pygame 493
multithreading 493
django-models 486
opencv 484
for-loop 455
windows 452
html 441
python-requests 421
loops 404
sqlalchemy 402
macos 402
algorithm 360
anaconda 351
sqlite 346
deep-learning 343
postgresql 342
pycharm 342
beautifulsoup 341
file 330
import 327
subprocess 326
javascript 324
web-scraping 323
jupyter-notebook 321
sql 319
amazon-web-services 317
multiprocessing 312
scipy 306
c++ 304
django-rest-framework 289
docker 281
neural-network 280
if-statement 276
kivy 275
ubuntu 272
performance 268
recursion 266
module 263
selenium-webdriver 262
oop 257
virtualenv 257
datetime 256
scrapy 250
discord.py 247
djang

In [7]:
question_with_code_description_raw = pd.read_csv('/home/st-aleksandr-razin/workspace/SRC_QC4QA/data/summarization/final/question_with_code_description_raw.csv')
question_with_code_description_raw

Unnamed: 0,Id_Q,Title_Q,Body_Q,Tags_Q,Code_Q
0,1476,How do you express binary literals in Python?,<p>How do you express an integer as a binary n...,"['python', 'syntax', 'binary', 'integer', 'lit...",True
1,20927,Updating an auto_now DateTimeField in a parent...,<p>I've got two models: Message and Attachment...,"['python', 'database', 'django', 'orm']",True
2,59825,How to retrieve an element from a set without ...,<p>Suppose the following:</p>\n\n<pre><code>&g...,"['python', 'set']",True
3,61605,Is it pythonic for a function to return multip...,"<p>In python, you can have a function return m...","['python', 'function', 'return-value', 'multip...",True
4,65266,Caching compiled regex objects in Python?,<p>Each time a python file is imported that co...,"['python', 'regex', 'caching']",True
...,...,...,...,...,...
49990,72498913,"Pyngrok & Flask, Your account is limited to 1 ...",<p>I trying to run both flask and ngrok in the...,"['python', 'r', 'python-3.x', 'flask', 'pyngrok']",True
49991,72500796,How to fix ModuleNotFoundError: No module name...,<p>I have currently installed the terra_sdk==2...,"['python', 'terra']",True
49992,72502170,How to drop duplicates ignoring one column,<p>I have a <code>DataFrame</code> with multip...,"['python', 'pandas']",True
49993,72502242,"django.db.utils.ProgrammingError: relation ""ap...",<p>After I deleted all the migration files and...,"['python', 'django', 'django-migrations']",True


In [11]:
for i in question_with_code_description['Body_Q'].values[1:]:
    print(i)
    break

<p>I've got two models: Message and Attachment. Each attachment is attached to a specific message, using a ForeignKey on the Attachment model. Both models have an auto_now DateTimeField called updated. I'm trying to make it so that when any attachment is saved, it also sets the updated field on the associated message to now. Here's my code:</p>

code description start: Save attachment to the database. code description end

<p>Will this work, and if you can explain it to me, why? If not, how would I accomplish this?</p>



In [12]:
for i in question_with_code_description_raw['Body_Q'].values[1:]:
    print(i)
    break

<p>I've got two models: Message and Attachment. Each attachment is attached to a specific message, using a ForeignKey on the Attachment model. Both models have an auto_now DateTimeField called updated. I'm trying to make it so that when any attachment is saved, it also sets the updated field on the associated message to now. Here's my code:</p>

<pre><code>def save(self):
    super(Attachment, self).save()
    self.message.updated = self.updated
</code></pre>

<p>Will this work, and if you can explain it to me, why? If not, how would I accomplish this?</p>



In [13]:
from datasets import load_dataset

In [15]:
question_with_code_description_raw.to_json('/home/st-aleksandr-razin/workspace/SRC_QC4QA/data/summarization/final/question_without_code_description.json', orient='records')

dataset = load_dataset(
    "json",
    data_files={
        "train": "/home/st-aleksandr-razin/workspace/SRC_QC4QA/data/summarization/final/question_without_code_description.json",
    },
)

Downloading and preparing dataset json/default to /home/st-aleksandr-razin/.cache/huggingface/datasets/json/default-85e08b30374667f5/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /home/st-aleksandr-razin/.cache/huggingface/datasets/json/default-85e08b30374667f5/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]