# Agrupación por años

In [4]:
import pandas as pd
# Path to your CSV file, assuming it's in the same directory as the notebook
file_path = 'sps_X_year_X_rq_X_topic.csv'
# Read the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)
# Display the first 5 rows of the DataFrame
df

Unnamed: 0,RQ,Topics,2015,2011,2010,2023,2022,2019,2017,2016,...,2005,1999,2000,2002,2004,1998,1996,2001,Unnamed: 30,2025
0,RQ1,ARTIFICIAL INTELLIGENCE,,SPS105,SPS106,SPS021 SPS044,,,SPS111,,...,,,,,,,,,,
1,RQ1,CLOUD COMPUTING,SPS009 SPS012,SPS051 SPS065,SPS055 SPS058 SPS106,SPS021 SPS081 SPS112,,SPS011 SPS046 SPS053,SPS059,SPS015 SPS022,...,,,,,,,,,,
2,RQ1,CONTAINERIZATION,SPS108,,,SPS037 SPS038 SPS044 SPS081,SPS084 SPS085,SPS011 SPS053,SPS026,,...,,,,,,,,,,
3,RQ1,GRID COMPUTING,SPS002 SPS009 SPS074,SPS003 SPS008 SPS028 SPS065,SPS055 SPS061 SPS107,SPS112,SPS029,SPS046 SPS053,,SPS015 SPS066,...,SPS001,,,SPS014 SPS100,SPS052,,,,,
4,RQ1,HPC,SPS013 SPS074 SPS082,SPS065 SPS105,SPS077 SPS106,SPS037 SPS038 SPS112,SPS041 SPS042 SPS084 SPS085 SPS104,SPS046,SPS079 SPS094,SPS066,...,,,SPS071,,,,,,,
5,RQ1,JAVA,,,,,,,,,...,,,,,,,,,,
6,RQ1,VIRTUALIZATION,SPS108,SPS065,SPS088,SPS044 SPS081,SPS085,,,SPS022,...,,,,,,,,,,
7,RQ1,KUBERNETES,,,,SPS081,SPS027 SPS084,,,,...,,,,,,,,,,
8,RQ1,NETWORK,SPS002 SPS076,,,,,,,,...,,,,,SPS052,,,,,
9,RQ1,PARALLEL,SPS009,SPS007 SPS105,SPS061 SPS106,SPS038,SPS041,SPS046 SPS083,SPS026 SPS032,SPS066,...,,,SPS071,,,,SPS070,,,


# Tabla 13
### Agrupación de SPS por RQ X (Topicos & Grupos de 5 años)

In [9]:

# Identify columns that are years and fill empty values
year_columns = [col for col in df.columns if col.isdigit()]
df[year_columns] = df[year_columns].astype(str).replace('nan', '')

# Define the 5-year intervals
year_ranges = {
    '1993-2007': range(1993, 2009),
    '2008-2012': range(2008, 2013),
    '2013-2017': range(2013, 2018),
    '2018-2022': range(2018, 2023),
    '2023-2025': range(2023, 2026)
}

# Create a new DataFrame for the grouped data, preserving RQ and Topics
grouped_df = df[['RQ', 'Topics']].copy()

# Process each year range
for range_name, years in year_ranges.items():
    # Get the columns from the original DataFrame that fall into the current range
    cols_in_range = [str(year) for year in years if str(year) in df.columns]

    # Combine the data from these columns
    if cols_in_range:
        # Join the strings from the columns in the range, separated by a space
        # The result is cleaned to remove extra spaces
        grouped_df[range_name] = df[cols_in_range].apply(
            lambda row: ' '.join(row.dropna().astype(str)).strip(), axis=1
        ).str.split().str.join(' ')
    else:
        grouped_df[range_name] = ''

# Display the entire grouped DataFrame
grouped_df

Unnamed: 0,RQ,Topics,1993-2007,2008-2012,2013-2017,2018-2022,2023-2025
0,RQ1,ARTIFICIAL INTELLIGENCE,,SPS106 SPS105,SPS110 SPS111,SPS049 SPS073 SPS096 SPS109,SPS021 SPS044 SPS068
1,RQ1,CLOUD COMPUTING,,SPS078 SPS091 SPS092 SPS055 SPS058 SPS106 SPS0...,SPS018 SPS047 SPS050 SPS063 SPS030 SPS048 SPS0...,SPS054 SPS056 SPS097 SPS011 SPS046 SPS053 SPS0...,SPS021 SPS081 SPS112
2,RQ1,CONTAINERIZATION,,,SPS108 SPS026,SPS019 SPS035 SPS011 SPS053 SPS025 SPS034 SPS0...,SPS037 SPS038 SPS044 SPS081 SPS004
3,RQ1,GRID COMPUTING,SPS014 SPS100 SPS052 SPS001 SPS069 SPS006 SPS098,SPS006 SPS098 SPS017 SPS040 SPS060 SPS089 SPS0...,SPS018 SPS075 SPS048 SPS057 SPS062 SPS002 SPS0...,SPS064 SPS046 SPS053 SPS090 SPS029,SPS112
4,RQ1,HPC,SPS071 SPS006 SPS101 SPS103,SPS006 SPS101 SPS103 SPS017 SPS077 SPS106 SPS0...,SPS050 SPS075 SPS013 SPS074 SPS082 SPS066 SPS0...,SPS035 SPS073 SPS046 SPS010 SPS025 SPS034 SPS0...,SPS037 SPS038 SPS112 SPS068
5,RQ1,JAVA,,,,,SPS043
6,RQ1,VIRTUALIZATION,,SPS017 SPS088 SPS065,SPS062 SPS108 SPS022,SPS097 SPS034 SPS085,SPS044 SPS081
7,RQ1,KUBERNETES,,,,SPS034 SPS027 SPS084,SPS081 SPS004
8,RQ1,NETWORK,SPS052,,SPS047 SPS002 SPS076,,
9,RQ1,PARALLEL,SPS070 SPS071 SPS006 SPS103,SPS006 SPS103 SPS092 SPS061 SPS106 SPS007 SPS1...,SPS075 SPS110 SPS009 SPS066 SPS026 SPS032,SPS054 SPS046 SPS083 SPS010 SPS096 SPS025 SPS0...,SPS038 SPS043


### Tabla 13:  RQ1

In [7]:
# Filter for rows where the 'RQ' column is 'RQ1'
rq1_df = grouped_df[grouped_df['RQ'] == 'RQ1']

# Display the filtered DataFrame
rq1_df

Unnamed: 0,RQ,Topics,1993-1997,1998-2002,2003-2007,2008-2012,2013-2017,2018-2022,2023-2025
0,RQ1,ARTIFICIAL INTELLIGENCE,,,,SPS106 SPS105,SPS110 SPS111,SPS049 SPS073 SPS096 SPS109,SPS021 SPS044 SPS068
1,RQ1,CLOUD COMPUTING,,,,SPS078 SPS091 SPS092 SPS055 SPS058 SPS106 SPS0...,SPS018 SPS047 SPS050 SPS063 SPS030 SPS048 SPS0...,SPS054 SPS056 SPS097 SPS011 SPS046 SPS053 SPS0...,SPS021 SPS081 SPS112
2,RQ1,CONTAINERIZATION,,,,,SPS108 SPS026,SPS019 SPS035 SPS011 SPS053 SPS025 SPS034 SPS0...,SPS037 SPS038 SPS044 SPS081 SPS004
3,RQ1,GRID COMPUTING,,SPS014 SPS100,SPS052 SPS001 SPS069,SPS006 SPS098 SPS017 SPS040 SPS060 SPS089 SPS0...,SPS018 SPS075 SPS048 SPS057 SPS062 SPS002 SPS0...,SPS064 SPS046 SPS053 SPS090 SPS029,SPS112
4,RQ1,HPC,,SPS071,,SPS006 SPS101 SPS103 SPS017 SPS077 SPS106 SPS0...,SPS050 SPS075 SPS013 SPS074 SPS082 SPS066 SPS0...,SPS035 SPS073 SPS046 SPS010 SPS025 SPS034 SPS0...,SPS037 SPS038 SPS112 SPS068
5,RQ1,JAVA,,,,,,,SPS043
6,RQ1,VIRTUALIZATION,,,,SPS017 SPS088 SPS065,SPS062 SPS108 SPS022,SPS097 SPS034 SPS085,SPS044 SPS081
7,RQ1,KUBERNETES,,,,,,SPS034 SPS027 SPS084,SPS081 SPS004
8,RQ1,NETWORK,,,SPS052,,SPS047 SPS002 SPS076,,
9,RQ1,PARALLEL,SPS070,SPS071,,SPS006 SPS103 SPS092 SPS061 SPS106 SPS007 SPS1...,SPS075 SPS110 SPS009 SPS066 SPS026 SPS032,SPS054 SPS046 SPS083 SPS010 SPS096 SPS025 SPS0...,SPS038 SPS043


Tabla 13: RQ2

In [8]:
rq2_df = grouped_df[grouped_df['RQ'] == 'RQ2']

rq2_df

Unnamed: 0,RQ,Topics,1993-1997,1998-2002,2003-2007,2008-2012,2013-2017,2018-2022,2023-2025
12,RQ2,TEACHING,,,,SPS024,SPS057 SPS074 SPS032 SPS079,SPS109,
13,RQ2,RESEARCH,,SPS100,SPS052,SPS101 SPS114 SPS107,SPS110 SPS094 SPS111,SPS097 SPS096 SPS080 SPS090 SPS102 SPS084 SPS104,SPS037 SPS081 SPS099 SPS112
14,RQ2,EXTENSION,,,,,,SPS034 SPS085,SPS068
