In [67]:
import random
import pandas as pd
import numpy as np
import pickle
from deap import base, creator, tools, algorithms
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
import warnings

warnings.filterwarnings("ignore")

with open(r"/Users/taejin/Desktop/데분 학회/급행버스/api 딕셔너리/getStationByRoute.pickle", 'rb') as file:
    getStationByRoute = pickle.load(file)

with open(r'/Users/taejin/Downloads/getStationByPosList.pickle', 'rb') as f:
    # 객체를 로드합니다.
    getStationByPosList = pickle.load(f)

with open(r'/Users/taejin/Desktop/데분 학회/급행버스/api 딕셔너리/노선별_정류장Kmeans정리.pickle', 'rb') as f:
    # 객체를 로드합니다.
    station_info = pickle.load(f)

od = pd.read_csv('/Users/taejin/Desktop/데분 학회/급행버스/od_weekday_2.csv')


In [68]:
def travel_time_by_station(getStationByRoute, route_name):
    df = getStationByRoute[route_name] # 피클에서 노선 정보 불러오기
    df['arsId'] = df['arsId'].astype(int)
    df = df[['busRouteNm', 'arsId', 'stationNm', 'seq', 'sectSpd', 'fullSectDist']] #필요한 행만 불러오기
    df['travelTime'] = df['fullSectDist'].astype(int) * 0.06/ 30 #travel time 열 추가 (30km, 0.06 분으로 변환하기 위한 값 60min/1000km)
    df['seq'] = df['seq'].astype(int) # 정류장 순서 타입 int로 변경

    return df

def passenger_onoff_num(od, route_name):
    od['승객수'] = od['승객수'].astype(int)
    od = od[od['노선명'] == route_name]
    boarding_data = od.groupby('승차_정류장순번')['승객수'].sum().reset_index()
    boarding_data.columns = ['정류장순번', '승차수']

    # Group by 하차_정류장순번 and sum 승객수
    alighting_data = od.groupby('하차_정류장순번')['승객수'].sum().reset_index()
    alighting_data.columns = ['정류장순번', '하차수']

    # Merge the boarding and alighting data on 정류장순번
    result = pd.merge(boarding_data, alighting_data, on='정류장순번', how='outer').fillna(0)
    result['승객총합'] = result['승차수'] + result['하차수']
    result['소요시간'] = result[['승차수', '하차수']].max(axis=1) * 2.3 /60 #승하차시 소요시간(분)
    #버스 10대/시간, 운행시간 18시간으로 나누어 버스 한대당 으로 변환
    result[['승차수', '하차수', '승객총합', '소요시간']] = result[['승차수', '하차수', '승객총합', '소요시간']]/180
    
    return result


def station_kmeans_result(station_df):
    scaler = MinMaxScaler()
    station_df[['dist', 'bus_cnt', '승하차총승객수']] = scaler.fit_transform(station_df[['dist', 'bus_cnt', '승하차총승객수']])

    # K-means 군집화
    kmeans = KMeans(n_clusters=3, random_state=42)
    station_df['cluster'] = kmeans.fit_predict(station_df[['dist', 'bus_cnt', '승하차총승객수']])

    # 높은 승객 수, 많은 버스 수, 짧은 거리의 정류장 선택
    selected_cluster = station_df.groupby('cluster')['승하차총승객수'].mean().idxmax(axis=0)

    # 급행 버스 정류장 선택
    express_stations = station_df[station_df['cluster'] == selected_cluster]

    return express_stations

def gene_algorithm_data(route_name): #유전 알고리즘을 위한 정류장별 데이터
    df1 = travel_time_by_station(getStationByRoute, route_name)
    df2 = passenger_onoff_num(od, route_name)
    df3 = station_kmeans_result(station_info[route_name])
    
    df3['cluster'] = 1
    df3 = dict(zip(df3['arsId'].astype(int), df3['cluster']))
    # display(df1)
    # display(df2)

    df1 = pd.merge(df1, df2, left_on = 'seq', right_on = '정류장순번', how = 'left')

    df1['cluster'] = df1['arsId'].map(df3)
    df1['cluster'].replace(np.nan, 0, inplace=True)

    df1 = df1[['arsId', 'seq', 'travelTime', '승차수', '하차수', '승객총합', '소요시간','cluster']]

    return df1

In [95]:
# 가중치 파라미터 (조정해야 하는 값)
a = 0.1
b = 2

# 정차 여부에 따른 승객 수 및 노선 소요 시간 계산 함수
def total_passenger(individual, df):
    df['cluster'] = individual
    passenger = (df['승객총합'] * df['cluster']).sum()
    return passenger

def travel_time(individual, df):
    df['cluster'] = individual
    # 구간 통행속도
    travel_time = df['travelTime'].sum()
    # 가감속 시간
    acc_time = (df['cluster'] * 11.6/60).sum()
    # 승하차시간
    onoff_time = (df['cluster'] * df['소요시간']).sum()
    travel_time = travel_time + acc_time + onoff_time
    return travel_time

# 적합도 함수
def fitness(individual):
    total_pass = total_passenger(individual, df)
    total_time = travel_time(individual, df)
    fit_value = + b * total_time - a * total_pass + sum(individual)
    return fit_value,

# # 개체를 초기화할 때 고정된 유전자를 설정하는 함수
# def initIndividual(icls, fixed_indices, fixed_values, size):
#     individual = [random.randint(0, 1) for _ in range(size)]
#     for idx, value in zip(fixed_indices, fixed_values):
#         individual[idx] = value
#     return icls(individual)

# 개체를 초기화할 때 고정된 유전자를 설정하고 1의 개수를 제한하는 함수
def initIndividual(icls, fixed_indices, fixed_values, size):
    individual = [0] * size
    num_ones = size // 2
    
    # 고정된 유전자 설정
    for idx, value in zip(fixed_indices, fixed_values):
        individual[idx] = value
        if value == 1:
            num_ones -= 1
    
    # 나머지 유전자 설정
    available_indices = [i for i in range(size) if i not in fixed_indices]
    ones_indices = random.sample(available_indices, num_ones)
    
    for idx in ones_indices:
        individual[idx] = 1
    
    return icls(individual)

In [87]:
def gene_algorithm(df, fixed_indices, fixed_values):
    num_stops = len(df)

    # 유전 알고리즘 설정
    if not hasattr(creator, 'FitnessMin'):
        creator.create("FitnessMin", base.Fitness, weights=(-1.0,))  # 적합도 클래스 생성

    if not hasattr(creator, 'Individual'):
        creator.create("Individual", list, fitness=creator.FitnessMin)  # 개체 클래스 생성


    # 유전 알고리즘 도구상자
    toolbox = base.Toolbox()
    toolbox.register("attr_bool", random.randint, 0, 1)  # 개체 유전자 초기화 속성
    toolbox.register("individual", initIndividual, creator.Individual, fixed_indices, fixed_values, num_stops)  # 개체 속성
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)  # 모집단 속성

    toolbox.register("evaluate", fitness)  # 평가 속성, fitness(적합도 함수) 입력 필요
    toolbox.register("mate", tools.cxOnePoint)  # 교차 속성

    # 고정된 유전자를 반영한 돌연변이 연산 정의
    def mutFlipBitFixed(individual, indpb, fixed_indices):
        for i in range(len(individual)):
            if i not in fixed_indices and random.random() < indpb:
                individual[i] = type(individual[i])(not individual[i])
        return individual,

    # 도구 상자에 돌연변이 연산 등록
    toolbox.register("mutate", mutFlipBitFixed, indpb=0.05, fixed_indices=fixed_indices)

    # 교차 연산 수정
    def cxOnePointFixed(ind1, ind2, fixed_indices):
        size = min(len(ind1), len(ind2))
        cxpoint = random.randint(1, size - 1)
        for i in range(size):
            if i not in fixed_indices and i < cxpoint:
                ind1[i], ind2[i] = ind2[i], ind1[i]
        return ind1, ind2

    toolbox.register("select", tools.selRoulette)

    # 초기 개체군 생성
    population = toolbox.population(n=50)

    # 유전 알고리즘 실행
    result_population = algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.2, ngen=1000,
                                            stats=None, halloffame=None, verbose=False)

    # 최적 해 찾기
    best_individual = tools.selBest(result_population[0], k=1)[0]
    print("Best Individual: ", best_individual)
    print("Fitness: ", fitness(best_individual)[0])

    return best_individual

In [96]:
route_names = ['143','150','172']


for name in route_names:

    df = gene_algorithm_data(name)
    
    # 고정된 유전자를 설정
    fixed_indices = list(df[df['cluster'] == 1].index)  # 예: 16, 17, 18, 26, 81, 89, 90, 91, 92 번째 유전자는 고정
    fixed_values = [1 for _ in range(len(fixed_indices))]   # 고정된 유전자의 값
    
    result = gene_algorithm(df, fixed_indices, fixed_values)
    exist = travel_time([1 for _ in range(len(df))], df)
    new = travel_time(result, df)

    print(f'{name}: {exist/60}, {new/60}')
    

Best Individual:  [1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1]
Fitness:  320.24361111111114
143: 2.554608487654321, 2.3351087962962964
Best Individual:  [0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1]
Fitness:  405.6376666666667
150: 2.95023950617284, 2.8163694444444447
Best Individual:  [0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 

In [92]:
df = gene_algorithm_data('172')
# len(df[df['cluster'] == 1])
df


Unnamed: 0,arsId,seq,travelTime,승차수,하차수,승객총합,소요시간,cluster
0,11489,1,0.000,0.405556,0.055556,0.461111,0.015546,0.0
1,11426,2,0.256,2.011111,0.016667,2.027778,0.077093,1.0
2,11454,3,0.628,2.616667,0.044444,2.661111,0.100306,1.0
3,11452,4,0.468,0.600000,0.111111,0.711111,0.023000,0.0
4,11339,5,0.672,0.855556,0.383333,1.238889,0.032796,0.0
...,...,...,...,...,...,...,...,...
99,11340,100,0.968,0.400000,0.705556,1.105556,0.027046,0.0
100,11453,101,0.836,0.050000,1.250000,1.300000,0.047917,0.0
101,11455,102,0.330,0.027778,2.516667,2.544444,0.096472,1.0
102,11425,103,0.576,0.011111,1.300000,1.311111,0.049833,0.0


In [56]:
exist = travel_time([1 for _ in range(113)], df)
new = travel_time(result, df)

print(f'{name}: {exist/60}, {new/60}')

2.554608487654321 2.378335339506173


In [57]:
df2 = df[['arsId', 'seq']]
df2['exist'] = [1 for _ in range(113)]
df2['new'] = result
df2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['exist'] = [1 for _ in range(113)]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['new'] = result


Unnamed: 0,arsId,seq,exist,new
0,8161,1,1,1
1,8163,2,1,1
2,8408,3,1,0
3,8165,4,1,0
4,8167,5,1,1
...,...,...,...,...
108,8166,109,1,0
109,8409,110,1,0
110,8164,111,1,0
111,8162,112,1,1


In [27]:
import pickle

with open(r"/Users/taejin/Desktop/데분 학회/급행버스/api 딕셔너리/getStationByRoute.pickle", 'rb') as file:
    getStationByRoute = pickle.load(file)

In [31]:
data = getStationByRoute['143']
data = data[['arsId', 'gpsX', 'gpsY', 'seq', 'stationNm']]
data[['exist', 'new']] = df2[['exist', 'new']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[['exist', 'new']] = df2[['exist', 'new']]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[['exist', 'new']] = df2[['exist', 'new']]


In [33]:
data.to_csv('/Users/taejin/Desktop/데분 학회/급행버스/143번 버스.csv')