## Installing and importing libs

In [None]:
!pip install BeautifulSoup4



In [None]:
import pandas as pd
import numpy as np
import requests
import json
import time


from bs4 import BeautifulSoup
from tqdm import tqdm
from time import sleep
from datetime import date

## Data acquisition steps:
1. Get hackathons data (superficial and detailed information)
2. Get project links from submission galleries and collect information about projects
3. Get information about participants

For data collection we use simple REST requests & BeautifulSoup lib which allows to parse HTML pages.

## 1. Get hackathons data

We send requests to devpost to receive links to hackatons and some additional information like prizes, start/end dates, themes, etc.

In [None]:
hacks = []
for i in tqdm(range(2000)):
    data = requests.get('https://devpost.com/api/hackathons?page=' + str(i) + '&status[]=ended')
    j_data = json.loads(data.text)
    if len(j_data['hackathons']) > 0:
        hacks += j_data['hackathons']
    else:
        break

In [None]:
all_id = []
titles = []
locations = []
urls = []
start_dates = []
years = []
end_dates = []
themes = []
prizes = []
participants_reg = []
featured = []
organization_name = []
winners_announced = []
submission_gallery_url =[]
start_a_submission_url = []

for h in hacks:
    all_id.append(h['id'])
    titles.append(h['title'])
    locations.append(h['displayed_location']['location'])
    urls.append(h['url'])
    years.append(h['submission_period_dates'].rsplit(',')[1])
    end_dates.append(h['submission_period_dates'].rsplit(',')[0].rsplit('-')[-1])
    start_dates.append(h['submission_period_dates'].replace(h['submission_period_dates'].rsplit(',')[1], '').replace(h['submission_period_dates'].rsplit(',')[0].rsplit('-')[-1],'').replace('-','').replace(',',''))
    themes.append(', '.join([d['name'] for d in h['themes']]))
    prizes.append(h['prize_amount'].rsplit('<span')[0] + hacks[1]['prize_amount'].rsplit('</span>')[0].rsplit('>')[-1])
    participants_reg.append(h['registrations_count'])
    featured.append(h['featured'])
    organization_name.append(h['organization_name'])
    winners_announced.append(h['winners_announced'])
    submission_gallery_url.append(h['submission_gallery_url'])
    start_a_submission_url.append(h['start_a_submission_url'])

In [None]:
d = {'Id':all_id,'Title':titles, 'Location':locations, 'url':urls, 'start_date':start_dates, 'end_date':end_dates, 'year':years, 'themes':themes, 'prize':prizes, 'registered_N':participants_reg, 'featured':featured, 'organization_name':organization_name, 'winners_announced':winners_announced, 'submission_gallery_url':submission_gallery_url, 'start_a_submission_url':start_a_submission_url}
data = pd.DataFrame(d)
data.to_csv('general_data.csv', index=False)

Here and thereafter list of links will be divided into separate chunks, so we could collect information about some entities (hackathon, project, participant) and not overflow computer's memory. The information is saved into separate csv of length <= chunk size.

Chunk size varies from entity to entity.

In [None]:
chunk_size = 1000
num_chunks = len(urls) // chunk_size + 1

url_chunks = []

for i in tqdm(range(0, len(urls), chunk_size)):
    if i+chunk_size < len(urls):
        url_chunks.append(urls[i:i+chunk_size])
    else:
        url_chunks.append(urls[i:])

In [None]:
for i in tqdm(range(len(url_chunks))):
    urls = url_chunks[i]      
    criteria = []
    start_date_format = []
    end_date_format = []
    prize_header = []
    prize_money = []
    urls_2 = []
    hack_type = []
    all_info = []
    schedule = []
    

    for u in tqdm(range(len(urls))):
        test_detailed_data = requests.get(urls[u])
        test_detailed_data = test_detailed_data.text
        BS = BeautifulSoup(test_detailed_data, 'html.parser')
        urls_2.append(urls[u])
        
        try:
            g = BS.find('a', {'class' : "view-all-dates-link"})['href']
            schedule_data = requests.get(g).text
            schedule_BS = BeautifulSoup(schedule_data, 'html.parser')
            all_dates = schedule_BS.find(id = 'main').find('tbody').find_all('tr')
            all_dates_temp = []
            for tr in all_dates:
                all_dates_temp.append(tr.get_text().replace('\n', ' ').strip())
            schedule.append(', '.join(all_dates_temp))
        except:
            schedule.append('no schedule')
        
        try:
            info = BS.find_all('div', {'class' : "info"})
            hack_type.append(info[2].get_text().replace('\n', '').strip())
            all_info_temp = []
            for div in info:
                all_info_temp.append(div.get_text().replace('\n', '').strip())
            all_info.append(', '.join(all_info_temp))
        except:
            all_info.append('no info')
            hack_type.append('no type')
        
        
        try:
            data_for_json = BS.find( id = "challenge-json-ld").contents[0]
            data_for_json = json.loads(data_for_json)
        except:
            pass
            
        
        try:
            judging_criteria = BS.find( id = "judging-criteria").find_all('strong')
            url_criterias = []
            for li in judging_criteria:
                criteria_text = li.get_text().replace('\n', '').strip()
                url_criterias.append(criteria_text)
            criteria.append(', '.join(url_criterias))
        except:
            criteria.append('no criteria')

        try:
            start_date = data_for_json['startDate']
            start_date_format.append(date.fromisoformat(start_date.split(':')[0][:-3]))
        except:
            start_date_format.append('no start date')

        try:
            end_date = data_for_json['endDate']
            end_date_format.append(date.fromisoformat(end_date.split(':')[0][:-3]))
        except:
            end_date_format.append('no end date')

        try:
            prizes_title = BS.find( id = "prizes").find_all('h6')
            url_prize_header = []
            for header in prizes_title:
                url_prize_header.append(header.get_text().replace('\n', '').strip())
            prize_header.append(', '.join(url_prize_header))
        except:
            prize_header.append('no prizes')

        try:
            prize_money.append(BS.find( id = "prizes").find('strong').get_text())
        except:
            prize_money.append('no money prize')
            
        time.sleep(1)

    d_2 = {'URL':urls_2, 'Criteria': criteria,'schedule': schedule, 'hack_type': hack_type, 'info': all_info, 'start_date_format':start_date_format, 'end_date_format':end_date_format, 'Prizes': prize_header, 'prize_money':prize_money} 
    data_2 = pd.DataFrame(d_2)
    data_2.to_csv('data_' + str(i) + '.csv', index=False)
    
    time.sleep(30)

In [None]:
hacks_df = []
for i in tqdm(range(len(url_chunks))):  
    hacks_df.append(pd.read_csv('data_' + str(i) + '.csv'))
    
hacks_df_all = pd.concat(hacks_df, ignore_index=True)
hacks_df_all.to_csv(r'/content/drive/MyDrive/Thesis/data/hacks_all.csv', index=False)

## 2. Get project links from submission galleries and collect information about projects

This is the first part: collect projects urls.

In [None]:
chunk_size_2 = 1000
num_chunks_2 = len(submission_gallery_url) // chunk_size_2 + 1

submission_gallery_url_chunks = []

for i in tqdm(range(0, len(submission_gallery_url), chunk_size_2)):
    if i+chunk_size_2 < len(submission_gallery_url):
        submission_gallery_url_chunks.append(submission_gallery_url[i:i+chunk_size_2])
    else:
        submission_gallery_url_chunks.append(submission_gallery_url[i:])

In [None]:
for chunk in tqdm(range(len(submission_gallery_url_chunks))): 
    submission_gallery_urls = submission_gallery_url_chunks[chunk]      

    projects = []
    for url in tqdm(submission_gallery_urls):
        for i in tqdm(range(500)):
            test_detailed_data = requests.get(str(url) + "?page=" + str(i))
            test_detailed_data = test_detailed_data.text
            BS = BeautifulSoup(test_detailed_data, 'html.parser')
            
            sub_gal = BS.find(id = "submission-gallery")
            
            if not sub_gal:
                break
                
            try:
                find_all_a = BS.find_all(class_ = "block-wrapper-link fade link-to-software", href=True)
                for el in find_all_a:
                    project_url = el['href']
                    projects.append([url, project_url])
            except:
                "no links with this path"
    
            else:
                find_all_a = BS.find_all(class_ = "software-entry-link block-wrapper-link fade", href=True)
                for el in find_all_a:
                    project_url = el['href']
                    projects.append([url, project_url])
                
            if BS.find(id = "submission-gallery").find('p').get_text().replace('\n', '').strip() in ["There are no submissions which match your criteria.", "The hackathon managers haven't published this gallery yet, but hang tight!"]:
                break
                
    projects_df = pd.DataFrame(projects)
    projects_df.to_csv('project_url_' + str(chunk) + '.csv', index=False)

    time.sleep(10)

In [None]:
project_urls_df = []
for i in tqdm(range(len(submission_gallery_url_chunks))):
    project_urls_df.append(pd.read_csv('project_url_' + str(i) + '.csv'))
    
project_urls_df_all = pd.concat(project_urls_df, ignore_index=True)
project_urls_df_all.to_csv(r'/content/drive/MyDrive/Thesis/data/project_urls_all.csv', index=False)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 11.67it/s]


Part two: collect projects info (repository, likes, techonolies used, etc.).

In [None]:
chunk_size_3 = 1000
num_chunks_3 = len(project_urls_df_all) // chunk_size_3 + 1

projects_url_chunks = []

for i in tqdm(range(0, len(project_urls_df_all), chunk_size_3)):
    if i+chunk_size_3 < len(project_urls_df_all):
        projects_url_chunks.append(project_urls_df_all[i:i+chunk_size_3])
    else:
        projects_url_chunks.append(project_urls_df_all[i:])

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 261/261 [00:00<00:00, 18572.41it/s]


In [None]:
for i in tqdm(range(len(projects_url_chunks))):
    projects = projects_url_chunks[i].apply(lambda x: [x[0], x[1]], axis=1).tolist()
    github_links = []
    participants = []
    participants_num = []
    build_with = []
    repo_link = []
    repo = []
    submitted_to_link = []
    submitted_to_name = []
    submitted_to_hacks_num = []
    likes = []
    comments = []

    for url_submission, url_project in tqdm(projects):
        test_detailed_data = requests.get(url_project)
        test_detailed_data = test_detailed_data.text
        BS = BeautifulSoup(test_detailed_data, 'html.parser')
        
        try:
            all_links = BS.find_all(href=True)
            temp_github_links = []
            for el in all_links:
                link = el['href']
                if 'github.com' in link:
                    temp_github_links.append(link)

            github_links.append(', '.join(temp_github_links))
        except:
            github_links.append('Nan')
                
        try:        
            find_all_memb = BS.find(id = "app-team").find_all(class_ = "user-profile-link") 
            temp_participants = []
            for el in find_all_memb:
                participant_url = el['href']
                temp_participants.append(participant_url)
            participants.append(', '.join(np.unique(temp_participants)))
            participants_num.append(len(np.unique(temp_participants)))
        except:
            participants.append('Nan')
            participants_num.append('Nan')
    
    
        try:    
            find_all_a = BS.find_all(class_ = "cp-tag") 
            temp_build_with = []
            for el in find_all_a:
                temp_build_with.append(el.get_text())
            build_with.append(', '.join(temp_build_with))
        except:
            build_with.append('Nan')
            
        try:    
            find_all_rep = BS.find(class_ = "app-links section").find_all('li')

            temp_repo_link = []
            temp_repo = []
            for el in find_all_rep:
                href = find_all_rep[0].find('a')['href']
                rep_name = find_all_rep[0].find('span')
                rep_name = rep_name.get_text().replace('\n', '').strip()

                if href:
                    temp_repo_link.append(href)
                if rep_name:
                    temp_repo.append(rep_name)
            repo_link.append(', '.join(temp_repo_link))
            repo.append(', '.join(temp_repo))
        except:
            repo_link.append('Nan')
            repo.append('Nan')
                
        try:        
            find_all_submission_hack = BS.find(id="submissions").find_all('li')
            temp_submitted_to_link = []
            temp_submitted_to_name = []
            for el in find_all_submission_hack:
                a = el.find('a')
                p = el.find('p')
                if a and p:
                    href = a['href']
                    name = p.get_text().replace('\n', '').strip()
                    if href:
                        temp_submitted_to_link.append(href)
                    if name:
                        temp_submitted_to_name.append(name)

            submitted_to_link.append(', '.join(temp_submitted_to_link))
            submitted_to_name.append(', '.join(temp_submitted_to_name))
            submitted_to_hacks_num.append(len(np.unique(temp_submitted_to_link)))
        except:
            submitted_to_link.append('Nan')
            submitted_to_name.append('Nan')
            submitted_to_hacks_num.append('')
                
        try:
            like_comm = BS.find_all('span', {'class' : "side-count"})
            
            try:
                likes_count = like_comm[0]
                likes.append(likes_count.get_text().replace('\n', '').strip())
            except:
                likes.append('0')
                
            try:
                comments_count = like_comm[1]
                comments.append(comments_count.get_text().replace('\n', '').strip())
            except:
                comments.append('0')
        except:
            likes.append('0')
            comments.append('0')
        
    d_3 = {'submission_gal_url': [l[0] for l in projects], 'project_URL': [l[1] for l in projects], 'github_links': github_links,'participants':participants,
           'participants_num': participants_num, 'build_with': build_with, 'repo_link': repo_link,
           'repo': repo, 'submitted_to_link': submitted_to_link,
           'submitted_to_name': submitted_to_name, 'submitted_to_hacks_num': submitted_to_hacks_num,
           'likes': likes, 'comments': comments}

    data_3 = pd.DataFrame(d_3)
    data_3.to_csv('project_data_' + str(i) + '.csv', index=False)
    
    time.sleep(30)

  0%|                                                                                                                        | 0/205 [00:00<?, ?it/s]
  0%|                                                                                                                       | 0/1000 [00:00<?, ?it/s][A
  0%|                                                                                                               | 1/1000 [00:01<16:47,  1.01s/it][A
  0%|▏                                                                                                              | 2/1000 [00:01<15:09,  1.10it/s][A
  0%|▎                                                                                                              | 3/1000 [00:02<14:20,  1.16it/s][A
  0%|▍                                                                                                              | 4/1000 [00:03<14:48,  1.12it/s][A
  0%|▌                                                                               

                                    submission_gal_url  \
0    https://progress-worthyweb.devpost.com/project...   
1    https://progress-worthyweb.devpost.com/project...   
2    https://progress-worthyweb.devpost.com/project...   
3    https://progress-worthyweb.devpost.com/project...   
4    https://progress-worthyweb.devpost.com/project...   
..                                                 ...   
995  https://alexamultimodal.devpost.com/project-ga...   
996  https://alexamultimodal.devpost.com/project-ga...   
997  https://alexamultimodal.devpost.com/project-ga...   
998  https://alexamultimodal.devpost.com/project-ga...   
999  https://alexamultimodal.devpost.com/project-ga...   

                                           project_URL  \
0            https://devpost.com/software/munch-z05pux   
1        https://devpost.com/software/safetyapp-wh21yx   
2    https://devpost.com/software/cambidge-resilien...   
3                  https://devpost.com/software/foodcy   
4         htt

  0%|▌                                                                                                           | 1/205 [14:50<50:28:40, 890.79s/it]
  0%|                                                                                                                       | 0/1000 [00:00<?, ?it/s][A
  0%|                                                                                                               | 1/1000 [00:00<13:47,  1.21it/s][A
  0%|▏                                                                                                              | 2/1000 [00:01<16:01,  1.04it/s][A
  0%|▎                                                                                                              | 3/1000 [00:02<14:32,  1.14it/s][A
  0%|▍                                                                                                              | 4/1000 [00:03<14:21,  1.16it/s][A
  0%|▌                                                                               

                                    submission_gal_url  \
0    https://alexamultimodal.devpost.com/project-ga...   
1    https://alexamultimodal.devpost.com/project-ga...   
2    https://alexamultimodal.devpost.com/project-ga...   
3    https://alexamultimodal.devpost.com/project-ga...   
4    https://alexamultimodal.devpost.com/project-ga...   
..                                                 ...   
995  https://alexalifehacks.devpost.com/project-gal...   
996  https://alexalifehacks.devpost.com/project-gal...   
997  https://alexalifehacks.devpost.com/project-gal...   
998  https://alexalifehacks.devpost.com/project-gal...   
999  https://alexalifehacks.devpost.com/project-gal...   

                                           project_URL  \
0    https://devpost.com/software/alexa-skill-story...   
1        https://devpost.com/software/netflix-roulette   
2    https://devpost.com/software/job-explorer-for-...   
3     https://devpost.com/software/ap-boss-alexa-skill   
4          ht

  1%|█                                                                                                           | 2/205 [29:52<50:36:26, 897.47s/it]
  0%|                                                                                                                       | 0/1000 [00:00<?, ?it/s][A
  0%|                                                                                                               | 1/1000 [00:00<14:48,  1.12it/s][A
  0%|▏                                                                                                              | 2/1000 [00:01<14:01,  1.19it/s][A
  0%|▎                                                                                                              | 3/1000 [00:02<14:47,  1.12it/s][A
  0%|▍                                                                                                              | 4/1000 [00:03<14:50,  1.12it/s][A
  0%|▌                                                                               

                                    submission_gal_url  \
0    https://alexalifehacks.devpost.com/project-gal...   
1    https://alexalifehacks.devpost.com/project-gal...   
2    https://alexalifehacks.devpost.com/project-gal...   
3    https://alexalifehacks.devpost.com/project-gal...   
4    https://alexalifehacks.devpost.com/project-gal...   
..                                                 ...   
995    https://corvidbywix.devpost.com/project-gallery   
996    https://corvidbywix.devpost.com/project-gallery   
997    https://corvidbywix.devpost.com/project-gallery   
998    https://corvidbywix.devpost.com/project-gallery   
999    https://corvidbywix.devpost.com/project-gallery   

                                           project_URL github_links  \
0             https://devpost.com/software/laundry-cat                
1            https://devpost.com/software/privacy-wall                
2        https://devpost.com/software/ebay-bid-checker                
3    https://devpos

  1%|█▌                                                                                                          | 3/205 [44:54<50:27:27, 899.25s/it]
  0%|                                                                                                                       | 0/1000 [00:00<?, ?it/s][A
  0%|                                                                                                               | 1/1000 [00:00<16:23,  1.02it/s][A
  0%|▏                                                                                                              | 2/1000 [00:01<14:33,  1.14it/s][A
  0%|▎                                                                                                              | 3/1000 [00:02<13:58,  1.19it/s][A
  0%|▍                                                                                                              | 4/1000 [00:03<13:36,  1.22it/s][A
  0%|▌                                                                               

                                    submission_gal_url  \
0      https://corvidbywix.devpost.com/project-gallery   
1      https://corvidbywix.devpost.com/project-gallery   
2      https://corvidbywix.devpost.com/project-gallery   
3      https://corvidbywix.devpost.com/project-gallery   
4      https://corvidbywix.devpost.com/project-gallery   
..                                                 ...   
995  https://mlh-local-hack-day-2018.devpost.com/pr...   
996  https://mlh-local-hack-day-2018.devpost.com/pr...   
997  https://mlh-local-hack-day-2018.devpost.com/pr...   
998  https://mlh-local-hack-day-2018.devpost.com/pr...   
999  https://mlh-local-hack-day-2018.devpost.com/pr...   

                                           project_URL  \
0       https://devpost.com/software/artists-unlimited   
1    https://devpost.com/software/metropolitan-conc...   
2    https://devpost.com/software/best-deal-for-tra...   
3    https://devpost.com/software/poureux-corvid-ha...   
4          ht

  2%|██                                                                                                        | 4/205 [1:00:08<50:32:42, 905.29s/it]
  0%|                                                                                                                       | 0/1000 [00:00<?, ?it/s][A
  0%|                                                                                                               | 1/1000 [00:00<13:38,  1.22it/s][A
  0%|▏                                                                                                              | 2/1000 [00:01<14:54,  1.12it/s][A
  0%|▎                                                                                                              | 3/1000 [00:02<13:19,  1.25it/s][A
  0%|▍                                                                                                              | 4/1000 [00:03<13:12,  1.26it/s][A
  0%|▌                                                                               

                                    submission_gal_url  \
0    https://mlh-local-hack-day-2018.devpost.com/pr...   
1    https://mlh-local-hack-day-2018.devpost.com/pr...   
2    https://mlh-local-hack-day-2018.devpost.com/pr...   
3    https://mlh-local-hack-day-2018.devpost.com/pr...   
4    https://mlh-local-hack-day-2018.devpost.com/pr...   
..                                                 ...   
995   https://hackcovid-19.devpost.com/project-gallery   
996   https://hackcovid-19.devpost.com/project-gallery   
997   https://hackcovid-19.devpost.com/project-gallery   
998   https://hackcovid-19.devpost.com/project-gallery   
999   https://hackcovid-19.devpost.com/project-gallery   

                                           project_URL  \
0         https://devpost.com/software/career-guidance   
1     https://devpost.com/software/localhackday-c132i0   
2                 https://devpost.com/software/imatext   
3                  https://devpost.com/software/praxis   
4      https:

  2%|██▌                                                                                                       | 5/205 [1:15:21<50:26:11, 907.86s/it]
  0%|                                                                                                                       | 0/1000 [00:00<?, ?it/s][A
  0%|                                                                                                               | 1/1000 [00:01<21:25,  1.29s/it][A
  0%|▏                                                                                                              | 2/1000 [00:02<17:58,  1.08s/it][A
  0%|▎                                                                                                              | 3/1000 [00:03<19:22,  1.17s/it][A
  0%|▍                                                                                                              | 4/1000 [00:04<17:09,  1.03s/it][A
  0%|▌                                                                               

In [None]:
projects_all_data_df = []
for i in tqdm(range(len(submission_gallery_url_chunks))):  
    projects_all_data_df.append(pd.read_csv('project_data_' + str(i) + '.csv'))
    
all_projects_data = pd.concat(projects_all_data_df, ignore_index=True)
all_projects_data.to_csv(r'/content/drive/MyDrive/Thesis/data/all_projects_data.csv', index=False)

## 3. Get information about participants

Here we extract the information about each participant of a hackaton. We are interested in personal data such as person's name, country & skills.

In [None]:
all_participants = pd.DataFrame(all_projects_data.participants)
all_participants = all_participants[~all_participants['participants'].isna()]
temp_participants = ', '.join(all_participants['participants'].tolist()) 
unique_participants = np.unique(temp_participants.split(', '))[1:]

In [None]:
chunk_size_4 = 5000
num_chunks_4 = len(unique_participants) // chunk_size_4 + 1

participants_chunks = []

for i in tqdm(range(0, len(unique_participants), chunk_size_4)):
    if i+chunk_size_4 < len(unique_participants):
        participants_chunks.append(unique_participants[i:i+chunk_size_4])
    else:
        participants_chunks.append(unique_participants[i:])

In [None]:
for i in tqdm(range(len(participants_chunks))):
    participants = participants_chunks[i]
    name = []
    skills = []
    info = []
    country = []

    for p in tqdm(participants):
      test_detailed_data = requests.get(p)
      test_detailed_data = test_detailed_data.text
      BS = BeautifulSoup(test_detailed_data, 'html.parser')

      try:
          if BS.find('span', {'class' : "ss-icon ss-location"}):
              country.append(BS.find(id = "portfolio-user-links").find('li').get_text().replace('\n', '').strip())
          else:
              raise
      except:
          country.append('Nan')

      try:
          name.append(BS.find(id = "portfolio-user-name").get_text().replace('\n', '').split('(')[-2].strip())
      except:
          name.append('Nan')


      try:
          find_all_skills = BS.find('ul', {'class' : "portfolio-tags no-bullet inline-list"})
          temp_skills = []
          for li in find_all_skills:
              temp_skills.append(li.get_text().replace('\n', ''))
          skills.append(', '.join(temp_skills))  
      except:
          skills.append('Nan')


      try:
          find_all_info = BS.find(id = "portfolio-navigation").find_all('li')
          temp_info = []
          for li in find_all_info:

              temp_info.append(li.get_text().replace('\n',''))
          info.append(', '.join(temp_info)) 
      except:
          info.append('Nan')
                
    d_4 = {'name': name, 'skills': skills, 'info': info,'country': country, 'url': participants}

    data_4 = pd.DataFrame(d_4)
    data_4.to_csv(r'/content/drive/MyDrive/Thesis/data/participants_data_' + str(i) + '.csv', index=False)
    
    time.sleep(30)

In [None]:
participants_all_data_df = []
for i in tqdm(range(len(submission_gallery_url_chunks))):  
    participants_all_data_df.append(pd.read_csv(r'/content/drive/MyDrive/Thesis/data/participants_data_' + str(i) + '.csv'))
    
all_participants_data = pd.concat(participants_all_data_df, ignore_index=True)
all_participants_data.to_csv(r'/content/drive/MyDrive/Thesis/data/all_participants_data.csv', index=False)