In [1]:
# Stardew Valley Farm Info Aggregator
# Pulls n farms randomly from upload.farm and aggregates certain pertinent data points about each farm into a pandas dataframe
# then saves to csv

import requests as req
import pandas as pd
from bs4 import BeautifulSoup
import random
from time import sleep

In [2]:
base_uri = "https://upload.farm/"
slug_list = []
n = 1200
for i in range(n):
    try:
        print(f'Randomly selecting farm {i+1} of {n}')
        rand_page = random.randint(1,16104)
        # rand_page = 16104
        full_uri = f'{base_uri}all?p={rand_page}'
        resp = req.get(full_uri)
        resp_bs4 = BeautifulSoup(resp.text, features="html.parser")
        divTags = resp_bs4.body.find_all('div', attrs={'class':'col-md-4 col-sm-6 text-center previewbox'})
        rand_entry = random.randint(1,len(divTags)) - 1
        # rand_entry = 1
        tag = divTags[rand_entry]
        link = tag.find_all('a')[0]
        slug_list.append([rand_page,rand_entry,link['href']])
    except Exception as e:
        print(f'ran into an error: {e}. continuing')
        continue


Randomly selecting farm 1 of 1200
Randomly selecting farm 2 of 1200
Randomly selecting farm 3 of 1200
Randomly selecting farm 4 of 1200
Randomly selecting farm 5 of 1200
Randomly selecting farm 6 of 1200
Randomly selecting farm 7 of 1200
Randomly selecting farm 8 of 1200
Randomly selecting farm 9 of 1200
Randomly selecting farm 10 of 1200
Randomly selecting farm 11 of 1200
Randomly selecting farm 12 of 1200
Randomly selecting farm 13 of 1200
Randomly selecting farm 14 of 1200
Randomly selecting farm 15 of 1200
Randomly selecting farm 16 of 1200
Randomly selecting farm 17 of 1200
Randomly selecting farm 18 of 1200
Randomly selecting farm 19 of 1200
Randomly selecting farm 20 of 1200
Randomly selecting farm 21 of 1200
Randomly selecting farm 22 of 1200
Randomly selecting farm 23 of 1200
Randomly selecting farm 24 of 1200
Randomly selecting farm 25 of 1200
Randomly selecting farm 26 of 1200
Randomly selecting farm 27 of 1200
Randomly selecting farm 28 of 1200
Randomly selecting farm 29 of

In [3]:
final_df = pd.DataFrame(slug_list, columns=['page_index', 'page_element_index', 'farm_id'])
final_df[["user_name", "farm_name", "gender", "spouse", "pet_type", "pet_name", "total_earnings", "favorite_thing", "time_played", "farming_skill", "mining_skill", "foraging_skill", "fishing_skill", "combat_skill"]] = ''

for index, row in final_df.iterrows():
    try:
        print(f'Pulling data for farm {index+1} of {n}.')
        farm_uri = f'{base_uri}{row["farm_id"]}'
        farm_resp = req.get(farm_uri)
        farm_resp_bs4 = BeautifulSoup(farm_resp.text, features="html.parser")
        # get username, spouse, pet type, pet name
        summary_farmDivTag = farm_resp_bs4.find_all('div', attrs={'class':'info--summary-text'})[0]
        body_farmDivTag = farm_resp_bs4.find_all('div', attrs={'class':'info--body'})[0]
        body_farmSubDivTag = body_farmDivTag.find_all('div')
        for i in body_farmSubDivTag:
            try:
                if i.find('dt').text == 'Gender':
                    final_df.at[index, 'gender'] = i.find('dd').text.strip()
                if i.find('dt').text == 'Favorite Thing':
                    final_df.at[index, 'favorite_thing'] = i.find('dd').text.strip()
                if i.find('dt').text == 'Total Earnings':
                    final_df.at[index, 'total_earnings'] = i.find('dd').text.strip()[:-1].replace(',','')
                if i.find('dt').text == 'Time Played':
                    final_df.at[index, 'time_played'] = i.find('dd').text.strip()
            except:
                continue
        summary_farmP = summary_farmDivTag.find('p')
        summary_farmB = summary_farmP.find_all('b')
        summary_farmP_text = summary_farmP.text
        if summary_farmP_text.find('and has a pet') > 0:
            pet_type = summary_farmP_text[summary_farmP_text.find('and has a pet')+15:summary_farmP_text.find('named',summary_farmP_text.find('and has a pet'))].strip()
            pet_name = summary_farmP_text[summary_farmP_text.find('named',summary_farmP_text.find('and has a pet'))+6:len(summary_farmP_text)].strip().replace('.','')
            final_df.at[index, 'pet_type'] = pet_type
            final_df.at[index, 'pet_name'] = pet_name
        else:
            final_df.at[index, 'pet_type'] = "NA"
            final_df.at[index, 'pet_name'] = "NA"

        final_df.at[index, 'user_name'] = summary_farmB[0].text

        if summary_farmP_text.find('unmarried') > 0:
            final_df.at[index, 'spouse'] = "Unmarried"
        else:
            final_df.at[index, 'spouse'] = summary_farmB[1].text

        # Extract skill data
        skills_section = farm_resp_bs4.find('div', attrs={'id': 'skills-x5a1rb', 'class': 'tab-pane'})
        if skills_section:
            skill_rows = skills_section.find_all('dt')  # Extract all skill names (e.g., "Farming", "Mining")
            for skill in skill_rows:
                try:
                    skill_name = skill.text.strip()  # Example: "Farming"
                    # Find the corresponding <dd> and extract the skill level
                    skill_level_tag = skill.find_next('dd').find('span', class_='pixeltext')
                    skill_level = skill_level_tag.text.strip() if skill_level_tag else None  # Example: "10"
        
                    # Map skill name to the respective column
                    if skill_name == "Farming":
                        final_df.at[index, 'farming_skill'] = skill_level
                    elif skill_name == "Mining":
                        final_df.at[index, 'mining_skill'] = skill_level
                    elif skill_name == "Foraging":
                        final_df.at[index, 'foraging_skill'] = skill_level
                    elif skill_name == "Fishing":
                        final_df.at[index, 'fishing_skill'] = skill_level
                    elif skill_name == "Combat":
                        final_df.at[index, 'combat_skill'] = skill_level
                except Exception as skill_error:
                    print(f"Error extracting skill data: {skill_error}")
                    continue

        # get farm name
        title_farmDivTag = farm_resp_bs4.find_all('div', attrs={'class':'title-title'})[0]
        final_df.at[index, 'farm_name'] = title_farmDivTag.text[:-5].strip()
    except Exception as e:
        print(f'ERROR: {e}. Con')
        final_df.at[index, 'total_earnings'] = "ERROR"
        final_df.at[index, 'favorite_thing'] = "ERROR"
        final_df.at[index, 'gender'] = "ERROR"
        final_df.at[index, 'user_name'] = "ERROR"
        final_df.at[index, 'spouse'] = "ERROR"
        final_df.at[index, 'pet_type'] = "ERROR"
        final_df.at[index, 'pet_name'] = "ERROR"
        final_df.at[index, 'farm_name'] = "ERROR"
print(final_df)



Pulling data for farm 1 of 1200.
Pulling data for farm 2 of 1200.
Pulling data for farm 3 of 1200.
Pulling data for farm 4 of 1200.
Pulling data for farm 5 of 1200.
Pulling data for farm 6 of 1200.
Pulling data for farm 7 of 1200.
Pulling data for farm 8 of 1200.
Pulling data for farm 9 of 1200.
Pulling data for farm 10 of 1200.
Pulling data for farm 11 of 1200.
Pulling data for farm 12 of 1200.
Pulling data for farm 13 of 1200.
Pulling data for farm 14 of 1200.
Pulling data for farm 15 of 1200.
Pulling data for farm 16 of 1200.
Pulling data for farm 17 of 1200.
Pulling data for farm 18 of 1200.
Pulling data for farm 19 of 1200.
Pulling data for farm 20 of 1200.
Pulling data for farm 21 of 1200.
Pulling data for farm 22 of 1200.
Pulling data for farm 23 of 1200.
Pulling data for farm 24 of 1200.
Pulling data for farm 25 of 1200.
Pulling data for farm 26 of 1200.
Pulling data for farm 27 of 1200.
Pulling data for farm 28 of 1200.
Pulling data for farm 29 of 1200.
Pulling data for farm 3

In [7]:
final_df.head(10)

Unnamed: 0,page_index,page_element_index,farm_id,user_name,farm_name,gender,spouse,pet_type,pet_name,total_earnings,favorite_thing,time_played,farming_skill,mining_skill,foraging_skill,fishing_skill,combat_skill
0,3934,11,1L0cyu,Bahar,Des Hanim'in,Female,Unmarried,cat,Frernard,165911,Lego,33.8 hours,9.0,5.0,6.0,5.0,3.0
1,3086,17,1LGm6s,溪民,酿酒,Male,Unmarried,dog,瑜伽,13070,钱,1.6 hours,1.0,0.0,2.0,7.0,0.0
2,7077,11,1IyIEX,hyemin,NCT,Female,Harvey,dog,mark,2639333,mark,85.7 hours,10.0,8.0,10.0,10.0,7.0
3,1736,8,1NAJEQ,Александр,Теплый двор,Male,Penny,cat,Тишка,4695272,Мой член,72.3 hours,10.0,10.0,10.0,9.0,10.0
4,5484,9,1JwWuE,Karin,Primavera,Female,Unmarried,cat,Lídia,3852595,Morango,75.7 hours,10.0,9.0,10.0,10.0,7.0
5,6966,5,1IELgU,boi,testmultiplaye,Male,Unmarried,cat,Dödsoffer,15002628,cold ones,8.3 hours,1.0,1.0,0.0,0.0,1.0
6,8906,8,1H9CAr,Wayu,Pha&Mao,Male,Elliott,cat,Summao,2021078,He's my type,151.5 hours,,,,,
7,1781,9,1NvoCd,Bayu,Bima,Male,Unmarried,dog,Bolt,15666099,Natasya,63.3 hours,10.0,10.0,10.0,10.0,10.0
8,10084,11,1GlJsT,Fiona,Allegheny,Female,Unmarried,cat,Tuna,18606010,YouTube,59.6 hours,10.0,10.0,10.0,0.0,10.0
9,9344,13,1GSMvK,Nick,Wilderness,Male,Abigail,cat,Dudley,1915906,Television,85.7 hours,,,,,


In [5]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1198 entries, 0 to 1197
Data columns (total 17 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   page_index          1198 non-null   int64 
 1   page_element_index  1198 non-null   int64 
 2   farm_id             1198 non-null   object
 3   user_name           1198 non-null   object
 4   farm_name           1198 non-null   object
 5   gender              1198 non-null   object
 6   spouse              1198 non-null   object
 7   pet_type            1198 non-null   object
 8   pet_name            1198 non-null   object
 9   total_earnings      1198 non-null   object
 10  favorite_thing      1198 non-null   object
 11  time_played         1198 non-null   object
 12  farming_skill       1198 non-null   object
 13  mining_skill        1198 non-null   object
 14  foraging_skill      1198 non-null   object
 15  fishing_skill       1198 non-null   object
 16  combat_skill        1198

In [6]:
final_df.to_csv('stardewvalley_aggregator_data_complete.csv')
