In [1]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv('data_full.csv')

# Drop rows with "Nezistený" in the "Rok postavenia" column
df = df[df['Rok postavenia'] != 'Nezistený']
# Convert the "Rok postavenia" column to float
df['Rok postavenia'] = df['Rok postavenia'].astype(float)
# Convert the "Rok postavenia" column to integer
df['Rok postavenia'] = df['Rok postavenia'].astype(int)

# Drop rows where the year of construction is younger than 2012 or older than 1700
df = df[(df["Rok postavenia"] >= 1700) & (df["Rok postavenia"] <= 2012)]

print(f"Number of bridges in dataframe: {df.shape[0]}")

df = df.drop(df[(df['Latitude'] == "Nezistený") & (df['Longitude'] == "Nezistený")].index)
print(f"Reduction of unkown reduction, number of bridges: {df.shape[0]}")


columns_to_check = ['n_2012', 'n_2013', 'n_2014', 'n_2015', 'n_2016', 'n_2017', 'n_2018', 'n_2019', 'n_2020', 'n_2021', 'n_2022', 'n_2023']
count_zeros = lambda row: sum(1 for value in row if value == 0)
df = df[df.apply(count_zeros, axis=1) <= 3]
print(f"Reduction of bridges with three or more uknown condition, number of bridges: {df.shape[0]}")
df

Number of bridges in dataframe: 7892
Reduction of unkown reduction, number of bridges: 7891
Reduction of bridges with three or more uknown condition, number of bridges: 7507


Unnamed: 0,ID mosta,2012,2013,2014,2015,2016,2017,2018,2019,2020,...,n_2014,n_2015,n_2016,n_2017,n_2018,n_2019,n_2020,n_2021,n_2022,n_2023
0,M5897,Dobrý,Dobrý,Dobrý,Dobrý,Dobrý,Zlý,Zlý,Zlý,Zlý,...,3,3,3,5,5,5,5,5,5,5
1,M1979,Dobrý,Dobrý,Dobrý,Dobrý,Dobrý,Veľmi zlý,Veľmi zlý,Veľmi zlý,Veľmi zlý,...,3,3,3,6,6,6,6,6,6,6
2,M6525,Uspokojivý,Uspokojivý,Uspokojivý,Uspokojivý,Uspokojivý,Uspokojivý,Uspokojivý,Uspokojivý,Uspokojivý,...,4,4,4,4,4,4,4,4,4,4
3,M4584,Dobrý,Dobrý,Dobrý,Dobrý,Dobrý,Dobrý,Dobrý,Dobrý,Dobrý,...,3,3,3,3,3,3,3,3,3,4
4,M3053,Dobrý,Dobrý,Uspokojivý,Uspokojivý,Uspokojivý,Uspokojivý,Uspokojivý,Uspokojivý,Uspokojivý,...,4,4,4,4,4,4,4,5,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7959,M9491,Nezistený,Nezistený,Nezistený,Veľmi dobrý,Veľmi dobrý,Veľmi dobrý,Veľmi dobrý,Veľmi dobrý,Veľmi dobrý,...,0,2,2,2,2,2,2,3,3,3
7976,M8013,Nezistený,Nezistený,Nezistený,Bezchybný,Bezchybný,Bezchybný,Bezchybný,Bezchybný,Bezchybný,...,0,1,1,1,1,1,1,1,1,1
7980,M9378,Nezistený,Nezistený,Nezistený,Bezchybný,Bezchybný,Bezchybný,Veľmi dobrý,Veľmi dobrý,Veľmi dobrý,...,0,1,1,1,2,2,2,3,3,3
7981,M9373,Nezistený,Nezistený,Nezistený,Bezchybný,Bezchybný,Bezchybný,Veľmi dobrý,Veľmi dobrý,Veľmi dobrý,...,0,1,1,1,2,2,2,3,3,3


In [2]:
columns_to_check = ['n_2023', 'n_2022', 'n_2021', 'n_2020', 'n_2019', 'n_2018', 'n_2017', 'n_2016', 'n_2015', 'n_2014', 'n_2013', 'n_2012']

# Define the replace_zeros function with columns_to_check as an argument
def replace_zeros(row, columns_to_check):
    bridge_values = row[columns_to_check].tolist()

    # Iterate through the list and replace zeros with values from the last year
    for i in range(len(bridge_values)):
        if bridge_values[i] == 0 and i > 0:
            bridge_values[i] = bridge_values[i - 1]
    
    # Update the corresponding columns in the row
    row[columns_to_check] = bridge_values
    return row

# Apply the replace_zeros function to each row with columns_to_check as an argument
df = df.apply(replace_zeros, axis=1, args=(columns_to_check,))


In [3]:
# Dictionary mapping bridge IDs to values
width_values = {
    "M6601": 8.00,
    "M1352": 10.0,
    "M5485": 14.4,
    "M3102": 5.00,
    "M1661": 6.80,
    "M2186": 8.80,
    "M6863": 7.90,
    "M1319": 6.49,
    "M6987": 11.5,
    "M5196": 10.8,
    "M3616": 10.3,
    "M6312": 20.9,
    "M4142": 15.3,
    "M480": 9.50,
    "M6128": 8.50,
    "M2665": 9.00,
    "M1194": 8.00,
    "M4334": 10.0,
    "M8692": 8.50
}

# Update the "Volná šírka mosta" column based on the dictionary
for bridge_id, value in width_values.items():
    df.loc[df["ID mosta"] == bridge_id, "Volná šírka mosta"] = value

# Change the data type of the "Volná šírka mosta" column to float
df["Volná šírka mosta"] = df["Volná šírka mosta"].astype(float)

In [4]:
# Dictionary mapping bridge IDs to values
length_values = {
    "M6312": 10.0,
    "M5485": 10.7,
    "M4353": 420.1,
    "M5196": 18.5,
    "M3616": 19.5,
    "M1352": 350.4,
    "M2665": 11.0,
    "M6128": 630.0,
    "M1194": 55.1,
    "M1661": 67.3
}

# Update the "Dĺžka mosta" column based on the dictionary
for bridge_id, value in length_values.items():
    df.loc[df["ID mosta"] == bridge_id, "Dĺžka premostenia"] = value

# Change the data type of the "Volná šírka mosta" column to float
df["Dĺžka premostenia"] = df["Dĺžka premostenia"].astype(float)

In [5]:
# Create a list of classes to reassign
classes_to_reassign = ["miestna neurčená", "účelová cesta (UC)"]

# Assign these bridges to "cesta III. triedy"
df.loc[df["Trieda PK"].isin(classes_to_reassign), "Trieda PK"] = "cesta III. triedy"

In [6]:
import numpy as np

# Compare "Dĺžka premostenia" and "Volná šírka mosta" to calculate "n_length" and "n_width"
df["n_length"] = np.maximum(df["Dĺžka premostenia"], df["Volná šírka mosta"])
df["n_width"] = np.minimum(df["Dĺžka premostenia"], df["Volná šírka mosta"])

# Handle the case where "Dĺžka premostenia" and "Volná šírka mosta" are equal
equal_mask = df["Dĺžka premostenia"] == df["Volná šírka mosta"]
df["n_length"] = np.where(equal_mask, df["Dĺžka premostenia"], df["n_length"])
df["n_width"] = np.where(equal_mask, df["Volná šírka mosta"], df["n_width"])


In [7]:
# Define the key-value mapping for regions
region_mapping = {
    'Bratislava': 1,
    'Trnava': 2,
    'Trenčín': 3,
    'Nitra': 4,
    'Žilina': 5,
    'Banská Bystrica': 6,
    'Prešov': 7,
    'Košice': 8
}

# Use the map function to create the "n_region" column
df["n_region"] = df["Kraj"].map(region_mapping)

In [8]:
# Define the key-value mapping for class
class_mapping = {
    "cesta III. triedy": 4,
    "cesta I. triedy": 2,
    "cesta II. triedy": 3,
    "diaľnica": 1
}

# Use the map function to create the "n_class" column
df["n_class"] = df["Trieda PK"].map(class_mapping)

In [9]:
# Define a dictionary mapping bridge IDs to the desired years
year_mapping = {
    "M7405": 2005,
    "M6565": 2005,
    "M6585": 2005,
    "M7739": 2005,
    "M361": 2005,
    "M3370": 2005,
    "M5362": 2005,
    "M1660": 2005,
    "M6925": 1950
}

# Create the "n_years" column based on the "Rok postavenia" column
df["n_years"] = df["Rok postavenia"]

# Update the values for the specified bridges using the mapping
for bridge_id, year in year_mapping.items():
    df.loc[df["ID mosta"] == bridge_id, "n_years"] = year

In [10]:
# Define a dictionary mapping bridge IDs to new values for "Material" and "Druh konštrukcie"
bridge_redefinitions = {
    "M361": ("prefabrikovaný predpätý betón", "dosková"),
    "M1661": ("monolitický predpätý betón", "trámová"),
    "M1767": ("monolitický železobetón", "dosková"),
    "M6599": ("prefabrikovaný predpätý betón", "dosková"),
    "M4334": ("monolitický predpätý betón", "trámová")
}

# Apply the redefinitions to the DataFrame
for bridge_id, (material, druh_konstrukcie) in bridge_redefinitions.items():
    df.loc[df["ID mosta"] == bridge_id, "Material"] = material
    df.loc[df["ID mosta"] == bridge_id, "Druh konštrukcie"] = druh_konstrukcie


In [11]:
# Define a dictionary mapping materials to numeric values
material_mapping = {
    'monolitický železobetón': 1,
    'prefabrikovaný železobetón': 2,
    'prefabrikovaný predpätý betón': 3,
    'oceľ -  plnostenná': 4,
    'monolitický prostý betón': 5,
    'monolitický predpätý betón': 6,
    'kameň': 7,
    'tehla': 8,
    'oceľobetón': 9,
    'iný materiál': 10,
    'oceľ -  priehradová': 11,
    'spriahnutá betón-betón': 12,
    'spriahnutá oceľ-betón': 13
}

# Create the "n_material" column based on the mapping
df["n_material"] = df["Material"].map(material_mapping)


In [12]:
# Define a dictionary mapping construction types to numeric values
type_mapping = {
    'dosková': 1,
    'trámová': 2,
    'klenbová': 3,
    'rámová': 4,
    'oblúková': 5,
    'iná': 6,
    'priehradová': 7
}

# Create the "n_type" column based on the mapping
df["n_type"] = df["Druh konštrukcie"].map(type_mapping)

In [14]:
# Define a function to compare values and return 1, 0, or -1
def compare_values(row):
    if row['n_2023'] > row['n_2022']:
        return 1
    elif row['n_2023'] == row['n_2022']:
        return 0
    else:
        return -1

# Create the 'b_change' column using the apply function
df['b_change'] = df.apply(compare_values, axis=1)

In [15]:
df.to_csv('condition_filled.csv', index=False)