In [1]:
import pandas as pd
import os

In [2]:
def concatenate_excel_files(input_folder):
    """
    Concatenate all Excel files in a folder into a single DataFrame.
    
    Args:
        input_folder (str): Path to the folder containing the Excel files.
    
    Returns:
        DataFrame: Concatenated DataFrame containing data from all Excel files.
    """
    # List to hold DataFrames
    all_dataframes = []

    # Loop through all Excel files in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith('.xlsx') or file_name.endswith('.xls'):
            file_path = os.path.join(input_folder, file_name)
            
            # Read the Excel file
            df = pd.read_excel(file_path)
            all_dataframes.append(df)
            print(f"Loaded {file_name} with {len(df)} rows.")
    
    # Concatenate all DataFrames
    combined_df = pd.concat(all_dataframes, ignore_index=True)
    print(f"All files concatenated into a single DataFrame with {len(combined_df)} rows.")
    
    # Return the concatenated DataFrame
    return combined_df

In [3]:
# Specify the folder containing the Excel files
input_folder = "data/to_concatenate_st_game_performance_data_manual_download"  # Change to your folder path

# Run the function and get the concatenated DataFrame
combined_data = concatenate_excel_files(input_folder)

# Display the concatenated DataFrame (optional)
# print(combined_data)

Loaded Unified Top Apps Revenue  (Jan 1, 2023 - Dec 31, 2023, VN), Detailed (3).xlsx with 10000 rows.
Loaded Unified Top Apps Revenue  (Jan 1, 2018 - Dec 31, 2018, VN), Detailed (1).xlsx with 10000 rows.
Loaded Unified Top Apps Revenue  (Jan 1, 2014 - Dec 31, 2014, VN), Detailed (1).xlsx with 4671 rows.
Loaded Unified Top Apps Revenue  (Jan 1, 2016 - Dec 31, 2016, VN), Detailed (1).xlsx with 7276 rows.
Loaded Unified Top Apps Revenue  (Jan 1, 2021 - Dec 31, 2021, VN), Detailed (1).xlsx with 10000 rows.
Loaded Unified Top Apps Revenue  (Jan 1, 2019 - Dec 31, 2019, VN), Detailed (1).xlsx with 10000 rows.
Loaded Unified Top Apps Revenue  (Jan 1, 2022 - Dec 31, 2022, VN), Detailed (1).xlsx with 10000 rows.
Loaded Unified Top Apps Revenue  (Jan 1, 2015 - Dec 31, 2015, VN), Detailed (1).xlsx with 6101 rows.
Loaded Unified Top Apps Revenue  (Jan 1, 2020 - Dec 31, 2020, VN), Detailed (1).xlsx with 10000 rows.
Loaded Unified Top Apps Revenue  (Jan 1, 2017 - Dec 31, 2017, VN), Detailed (1).xlsx 

In [4]:
combined_data

Unnamed: 0,Unified Name,Unified ID,Unified Publisher Name,Unified Publisher ID,Date,Platform,Category,Downloads (Absolute),Downloads (Growth),Downloads (Growth %),"Revenue (Absolute, $)","Revenue (Growth, $)",Revenue (Growth %),Average DAU (Absolute),Average DAU (Growth),Average DAU (Growth %)
0,Arena of Valor,61a54e681800977bed4c561d,Garena Games Online,56180ad702ac645be5000013,2023-01-01,Unified,Action,14065022,-3344590,-0.19,6234277.21,-633320.38,-0.09,10573977.0,113064.0,0.01
1,Rise of Kingdoms,5ac2bdddcfc03208313848db,Lilith 莉莉丝,59418862660953716600e6f7,2023-01-01,Unified,Strategy,964015,-2295941,-0.70,5892819.33,-2602197.56,-0.31,116927.0,-33522.0,-0.22
2,Garena Free Fire,59d296dd60104c08ce201a27,Garena Games Online,56180ad702ac645be5000013,2023-01-01,Unified,Action,16850287,-2768294,-0.14,5873773.49,-794653.17,-0.12,4755329.0,-703303.0,-0.13
3,Coin Master,56e35cf19429946c4e0bac66,Moon Active,560c7e608ac350643902fee7,2023-01-01,Unified,Casual,9254039,8461558,10.68,3569386.06,1717741.75,0.93,212306.0,189547.0,8.33
4,Roblox,55d3a1a802ac64350a000d6e,Roblox Corporation,560c44d98ac3506439005015,2023-01-01,Unified,Adventure,15400901,5448689,0.55,3469873.38,860921.76,0.33,2572292.0,869982.0,0.51
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97270,수련충 : 파괴의신에 도전하라,5b59110e2532232bd99ed3a8,DAERISOFT,56294d8a3f07e236f90419a7,2024-01-01,Unified,Role Playing,0,-1,-1.00,39.95,-22.30,-0.36,0.0,-12.0,-1.00
97271,Maestro: Dark Talent HD - A Musical Hidden Obj...,55d940fb02ac645ad217f20a,Pixel United,56289df502ac6486a7009de2,2024-01-01,Unified,Games,0,0,,39.94,-5.43,-0.12,,,
97272,My Coloring Pages Book Game,5603619102ac64dde000002a,BWEB,55f896188ac350426b045626,2024-01-01,Unified,Games,851,79,0.10,39.94,25.15,1.70,,,
97273,Barbearian,5b5ae720f33f835aa74226d0,Kimmo Lahtinen,56289ecf02ac6486a7015db0,2024-01-01,Unified,Games,0,0,,39.93,-34.00,-0.46,,,


In [5]:
sorted_combined_data = combined_data.sort_values(by='Date', ascending=False)

In [6]:
sorted_combined_data

Unnamed: 0,Unified Name,Unified ID,Unified Publisher Name,Unified Publisher ID,Date,Platform,Category,Downloads (Absolute),Downloads (Growth),Downloads (Growth %),"Revenue (Absolute, $)","Revenue (Growth, $)",Revenue (Growth %),Average DAU (Absolute),Average DAU (Growth),Average DAU (Growth %)
97274,Kvíz do kapsy,5f067598d871ea764a819b24,FORNEMI s.r.o.,666c9d88f399ed2390e3f106,2024-01-01,Unified,Trivia,0,0,,39.93,-37.28,-0.48,,,
90604,Talking Tom Time Rush,635372ab32237d5b65415f8a,Outfit7 金科文化,5d3aa5f49cc0f85f968d0731,2024-01-01,Unified,Action,2087997,566501,0.37,1471.43,1293.29,7.26,290037.0,172495.0,1.47
90611,Hidden Folks,58a469a80211a6cc1500058c,Adriaan de Jongh,55f895878ac350426b0429ab,2024-01-01,Unified,Games,2,2,,1453.51,103.44,0.08,,,
90610,Pawnbarian: a Puzzle Roguelike,627b22f00e9c981d5769d229,j4nw,627b22f00e9c981d5769d228,2024-01-01,Unified,Games,13655,1970,0.17,1453.77,223.85,0.18,213.0,45.0,0.27
90609,WildCraft: Animal Sim Online,5a68f22709b90f36e22b4d40,Vladimir Duchenchuk,5604a4d78ac3501722022f81,2024-01-01,Unified,Games,639911,238993,0.60,1457.19,-613.35,-0.30,6229.0,2132.0,0.52
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23116,Resimli Kelime Bulmaca,55d20e5b02ac64ee730006cb,Bretzel Games,5cc750557927800edc53a6ce,2014-01-01,Unified,Word,110,110,,3.95,3.95,,,,
23117,Black Metal Man,537b76ff830f782dbe00c0f9,SinSquid,5604943f8ac350172200fddc,2014-01-01,Unified,Music,0,0,,3.94,3.94,,,,
23118,Эврика! Логические Задачи Игры,55d20e5702ac64ee730005ef,Nekhoroshev Ilya,55f893ad8ac350426b039896,2014-01-01,Unified,Trivia,0,0,,3.93,3.93,,,,
23119,Cotton Candy! - Maker Games,55d93a3b02ac645ad203e6ef,Bluebear Technologies,5614dc003f07e25d29019275,2014-01-01,Unified,Games,4295,-3928,-0.48,3.93,-45.88,-0.92,,,


In [7]:
sorted_combined_data.to_excel('data/output/st_game_data_2014_2024.xlsx', index=False)