In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial import distance

# Load the data
file_path = 'TOPSIS_income.xlsx'
df = pd.read_excel(file_path)

# Selecting the columns for the analysis
data_columns = ['Households Mean income (dollars)', 'Families Mean income (dollars)', 
                'Married-couple families Mean income (dollars)', 'Nonfamily households Mean income (dollars)']
scores = df[data_columns]

# Normalize the data
scaler = MinMaxScaler()
normalized_scores = scaler.fit_transform(scores)

# Calculate the ideal best and worst solutions
positive_ideal_solution = normalized_scores.max(axis=0)
negative_ideal_solution = normalized_scores.min(axis=0)

# Calculate the distance to the ideal best and worst solutions
distance_to_positive = distance.cdist(normalized_scores, [positive_ideal_solution], 'euclidean').flatten()
distance_to_negative = distance.cdist(normalized_scores, [negative_ideal_solution], 'euclidean').flatten()

# Calculate the relative closeness to the ideal solution
relative_closeness = distance_to_negative / (distance_to_positive + distance_to_negative)

# Add the TOPSIS score to the original dataframe
df['TOPSIS Score'] = relative_closeness
df.to_excel('TOPSIS_income_ranked.xlsx', index=False)
# Sort the dataframe based on the TOPSIS score in descending order
ranked_df = df.sort_values(by='TOPSIS Score', ascending=False)

# Show the top 10 states based on the TOPSIS score
ranked_df[['State', 'TOPSIS Score']].head()


Unnamed: 0,State,TOPSIS Score
8,District of Columbia,1.0
21,Massachusetts,0.718122
30,New Jersey,0.686724
4,California,0.684457
6,Connecticut,0.663987


In [2]:
# Now let the user add a row of data (including the state name) to the dataframe. 
# Then run the above code again to see how the ranking of the new data changes in the TOPSIS analysis.

df_new = df
# Get the user input for the new data
new_data = []
new_data.append(input('Enter the state name: '))
for column in data_columns:
    new_data.append(float(input(f'Enter the value for {column}: ')))

# Add the new data to the dataframe
new_row = pd.DataFrame([new_data], columns=['State'] + data_columns)
df_new = pd.concat([df_new, new_row], ignore_index=True)

# Selecting the columns for the analysis
data_columns = ['Households Mean income (dollars)', 'Families Mean income (dollars)', 
                'Married-couple families Mean income (dollars)', 'Nonfamily households Mean income (dollars)']
scores = df_new[data_columns]

# Normalize the data
scaler = MinMaxScaler()
normalized_scores = scaler.fit_transform(scores)

# Calculate the ideal best and worst solutions
positive_ideal_solution = normalized_scores.max(axis=0)
negative_ideal_solution = normalized_scores.min(axis=0)

# Calculate the distance to the ideal best and worst solutions
distance_to_positive = distance.cdist(normalized_scores, [positive_ideal_solution], 'euclidean').flatten()
distance_to_negative = distance.cdist(normalized_scores, [negative_ideal_solution], 'euclidean').flatten()

# Calculate the relative closeness to the ideal solution
relative_closeness = distance_to_negative / (distance_to_positive + distance_to_negative)

# 仅仅将新数据的topsis score加入到df_new中，不改变原有数据的topsis score
row_index = df_new.index[-1]
df_new.loc[row_index,'TOPSIS Score'] = relative_closeness[-1]
print('The TOPSIS score for the new data is:', relative_closeness[-1])

The TOPSIS score for the new data is: 0.8024349009975507


In [3]:
df['Total Mean Income'] = df['Households Mean income (dollars)'] + df['Families Mean income (dollars)'] + df['Married-couple families Mean income (dollars)'] + df['Nonfamily households Mean income (dollars)']

In [5]:
df[['State','Total Mean Income']]

Unnamed: 0,State,Total Mean Income
0,Alabama,349685
1,Alaska,449716
2,Arizona,418054
3,Arkansas,322111
4,California,541041
5,Colorado,497970
6,Connecticut,544249
7,Delaware,432670
8,District of Columbia,729776
9,Florida,413767
