<a href="https://colab.research.google.com/github/y0ungminhada/python-ai-modeling/blob/main/Modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **데이터 분석**

In [12]:
from scipy.stats import shapiro
average_data_by_region = pd.read_csv('/content/drive/MyDrive/average_data_by_region.csv')
# Exclude the first column (assumed to be non-numeric or an identifier like 'Region')
numeric_columns = average_data_by_region.iloc[:, 1:].columns

# Compute mean and variance
statistics = average_data_by_region[numeric_columns].agg(['mean', 'var']).T
statistics.columns = ['Mean', 'Variance']

# Perform Shapiro-Wilk test for normality
normality_results = {}
for column in numeric_columns:
    stat, p_value = shapiro(average_data_by_region[column].dropna())
    normality_results[column] = {
        'Normality': 'Yes' if p_value > 0.05 else 'No'
    }

# Combine results into a single DataFrame
normality_df = pd.DataFrame(normality_results).T
combined_statistics = pd.concat([statistics, normality_df], axis=1)

combined_statistics.head(50)

Unnamed: 0,Mean,Variance,Normality
Population,42594.583914,154600500.0,Yes
Business Opening Rate,255.126667,11343.23,No
Business Closing Rate,229.666667,8104.123,No
Number of Stores,5808.453333,5163697.0,No
Average Operating Period,2.715889,0.01605792,Yes
Startup Survival Rate (1 Year),75.932,6.428517,Yes
Startup Survival Rate (3 Years),46.753333,10.00023,Yes
Startup Survival Rate (5 Years),30.486,7.934506,Yes
Annual Survival Rate (1 Year),69.918,6.367313,Yes
Annual Survival Rate (3 Years),29.997333,7.616752,Yes


In [41]:
# Specify the columns to filter
columns_to_view = ['Rent Price', 'Population Density Score', 'Store Density Score',
                   'Subway Station', '한식', '일식', '중식', '양식', '카페']

# Filter the combined_statistics DataFrame to show only the specified columns
filtered_statistics = combined_statistics.loc[columns_to_view]
filtered_statistics.head(10)

Unnamed: 0,Mean,Variance,Normality
Rent Price,118351.846667,1222879000.0,No
Population Density Score,3.0,2.083333,No
Store Density Score,3.0,2.083333,No
Subway Station,10.92,42.07667,Yes
한식,4.0,2.747581,No
일식,4.0,8.860529,No
중식,4.0,4.894266,No
양식,4.0,25.44119,No
카페,4.0,7.139207,No


# **첫번째 Recommendation System**

In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

average_data_by_region = pd.read_csv('/content/drive/MyDrive/average_data_by_region.csv')

def recommend_region(user_input, data, features, category_column):

    # Ensure that the required features are in the dataset
    # Check for missing columns in the data
    missing_features = [feature for feature in features if feature not in data.columns]
    if missing_features:
        print(f"Warning: Missing columns in the data: {', '.join(missing_features)}")
        return pd.DataFrame()  # Return empty DataFrame if essential columns are missing

    # Select relevant features from the data
    selected_data = data[features].copy()

    # Normalize the data for consistent scaling
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(selected_data)

    # Transform user input into the same format for comparison
    user_vector = np.array([user_input[feature] for feature in features]).reshape(1, -1)
    scaled_user_vector = scaler.transform(user_vector)

    # Calculate cosine similarity between the user input and the dataset
    similarities = cosine_similarity(scaled_user_vector, scaled_data).flatten()

    # Add similarity scores to the data
    data['Cosine Similarity'] = similarities

    # Process the restaurant category proportion column
    if category_column in data.columns:
        # Normalize the category proportion (lower proportion is considered better)
        category_scores = 1 - MinMaxScaler().fit_transform(data[[category_column]])  # Normalize and reverse
        data['Category Score'] = category_scores.flatten()
    else:
        print(f"Warning: '{category_column}' column is missing in the data.")
        data['Category Score'] = 0  # Set a default score if the column is missing

    # Calculate the final recommendation score as a weighted average of similarity and category score
    data['Recommendation Score'] = data['Cosine Similarity'] * 0.5 + data['Category Score'] * 0.5

    # Sort the regions by their recommendation score in descending order
    recommended = data.sort_values(by='Recommendation Score', ascending=False)

    return recommended[['Region', 'Recommendation Score', 'Cosine Similarity', 'Category Score'] + features]

# User input handling
user_input = {
    'Rent Price': int(input("Enter the Rent Price(81087~244673): ")),  # Example: User enters a rent price within the range
    'Subway Station': int(input("Enter the number of Subway Stations: ")),  # Example: User specifies the number of subway stations
    'Population Density Score': int(input("Enter the Population Density Score(1~5): ")),  # Example: User provides a population density score
    'Store Density Score': int(input("Enter the Store Density Score(1~5): "))  # Example: User provides a store density score
}

# Check available restaurant categories
available_categories = ['한식', '중식', '일식', '양식', '카페']  # Example categories available in the data
selected_category = input(f"Select a restaurant category for recommendations ({', '.join(available_categories)}): ")

# Validate the selected category
if selected_category not in available_categories:
    print(f"Invalid category. Please choose one from {', '.join(available_categories)}.")
else:
    category_column = selected_category

    # Ensure the data is correctly set up with all necessary columns
    # Use the averaged data by region
    data_for_recommendation = average_data_by_region  # Assumes 'average_data_by_region' is preprocessed data

    # Run the recommendation system
    recommendations = recommend_region(user_input, data_for_recommendation,
                                   features=['Rent Price', 'Subway Station', 'Population Density Score', 'Store Density Score'],  # Features used for recommendations
                                   category_column=category_column)

# Display results
if not recommendations.empty:
    print("\nRecommended regions:")
    print(recommendations.head(5))  # Display the top 5 recommended regions
    # Display the top region recommendation
    print("\nTop choice: ")
    print(recommendations.head(1)['Region'])  # Display only the 'Region' of the top recommendation
else:
    print("\nNo recommendations available.")


Enter the Rent Price(81087~244673): 100000
Enter the number of Subway Stations: 3
Enter the Population Density Score(1~5): 4
Enter the Store Density Score(1~5): 3
Select a restaurant category for recommendations (한식, 중식, 일식, 양식, 카페): 중식

Recommended regions:
   Region  Recommendation Score  Cosine Similarity  Category Score  \
14    중랑구              0.967834           0.972534        0.963134   
8     동작구              0.945717           0.970928        0.920507   
9     성동구              0.935956           0.916842        0.955069   
15    양천구              0.923835           0.960573        0.887097   
10   서대문구              0.922568           0.990297        0.854839   

       Rent Price  Subway Station  Population Density Score  \
14   81087.000000             8.0                         4   
8   119571.166667            12.0                         5   
9   133941.833333            14.0                         4   
15  101594.000000             5.0                         4   
10  1



# **Frequent Itemset 찾기**

In [4]:
# Define categories of interest
columns_of_interest = ['한식', '일식', '중식', '양식', '카페']

# Calculate the mean value for each category
selected_columns = average_data_by_region[columns_of_interest]
column_means = selected_columns.mean()

# Generate "baskets" (transactions) grouped by 'Region'
baskets = []  # Renamed variable to 'baskets'
for index, row in average_data_by_region.iterrows():
    basket = []  # Renamed 'transaction' to 'basket'
    for category in columns_of_interest:
        if row[category] > column_means[category]:  # Include category if its value exceeds the mean
            basket.append(category)
    baskets.append(basket)

print(baskets)

# A-priori algorithm implementation
# A-priori algorithm: Function to find frequently occurring itemsets
def apriori(baskets, min_support):
    total_baskets = len(baskets)  # Total number of baskets (transactions)
    frequent_itemsets = {}  # Dictionary to store frequent itemsets
    candidate_itemsets = defaultdict(int)  # Store itemsets and their counts

    # Step 1: Count frequencies of 1-itemsets
    for basket in baskets:
        for item in basket:
            candidate_itemsets[frozenset([item])] += 1  # Increment count for each itemset

    # Calculate support and filter frequent 1-itemsets
    frequent_itemsets = {itemset: count / total_baskets for itemset, count in candidate_itemsets.items() if count / total_baskets >= min_support}

    k = 2  # Initial size of itemsets to evaluate
    while True:
        # Step 2: Generate candidate itemsets of size k
        candidate_itemsets = defaultdict(int)
        for basket in baskets:
            for combo in combinations(set().union(*frequent_itemsets.keys()), k):  # Generate k-combinations
                if frozenset(combo).issubset(basket):  # Check if the combination is in the basket
                    candidate_itemsets[frozenset(combo)] += 1

        # Calculate support and filter frequent k-itemsets
        current_frequent_itemsets = {itemset: count / total_baskets for itemset, count in candidate_itemsets.items() if count / total_baskets >= min_support}
        if not current_frequent_itemsets:  # Stop if no new frequent itemsets are found
            break

        frequent_itemsets.update(current_frequent_itemsets)  # Add new frequent itemsets
        k += 1  # Increment itemset size

    return frequent_itemsets

# Execute A-priori algorithm
frequent_itemsets = apriori(baskets, min_support=0.2)

# Print results
def frozenset_to_str(fset):
    return '{' + ', '.join(fset) + '}'  # Convert frozenset to a string for display

print("\nFrequent Itemsets:")
for itemset, support in frequent_itemsets.items():
    print(f"Itemset: {frozenset_to_str(itemset)}, Support: {support:.2f}")


[['한식', '일식', '중식', '양식', '카페'], ['한식', '일식', '중식', '양식', '카페'], ['카페'], ['한식', '일식', '중식', '양식', '카페'], ['한식', '일식', '중식', '카페'], ['일식', '중식'], ['한식', '양식', '카페'], ['중식'], ['카페'], ['카페'], [], ['한식', '일식', '중식'], ['중식'], ['양식', '카페'], [], [], [], ['한식', '중식'], ['카페'], ['한식', '일식', '중식', '양식'], ['카페'], ['한식', '일식', '중식', '양식'], ['한식'], [], []]

Frequent Itemsets:
Itemset: {한식}, Support: 0.40
Itemset: {일식}, Support: 0.32
Itemset: {중식}, Support: 0.44
Itemset: {양식}, Support: 0.28
Itemset: {카페}, Support: 0.44
Itemset: {중식, 한식}, Support: 0.32
Itemset: {중식, 일식}, Support: 0.32
Itemset: {중식, 양식}, Support: 0.20
Itemset: {한식, 카페}, Support: 0.20
Itemset: {일식, 한식}, Support: 0.28
Itemset: {한식, 양식}, Support: 0.24
Itemset: {양식, 카페}, Support: 0.20
Itemset: {일식, 양식}, Support: 0.20
Itemset: {중식, 일식, 한식}, Support: 0.28
Itemset: {중식, 한식, 양식}, Support: 0.20
Itemset: {중식, 일식, 양식}, Support: 0.20
Itemset: {일식, 한식, 양식}, Support: 0.20
Itemset: {중식, 일식, 한식, 양식}, Support: 0.20


# **Association Rule 구하기**

In [5]:
#Association rule 구하기
def generate_association_rules(frequent_itemsets, min_confidence=0.5):
    rules = []
    for itemset in frequent_itemsets.keys():
        if len(itemset) > 1:  # 두 개 이상의 항목으로 이루어진 집합만 고려
            for antecedent in map(frozenset, combinations(itemset, len(itemset) - 1)):
                consequent = itemset - antecedent
                antecedent_support = frequent_itemsets.get(antecedent, 0)
                if antecedent_support > 0:  # 신뢰도 계산
                    confidence = frequent_itemsets[itemset] / antecedent_support
                    if confidence >= min_confidence:
                        # 향상도(Lift) 계산
                        consequent_support = frequent_itemsets.get(consequent, 0)
                        if consequent_support > 0:
                            lift = confidence / consequent_support
                        else:
                            lift = 0
                        rules.append({
                            'Antecedent': set(antecedent),
                            'Consequent': set(consequent),
                            'Support': frequent_itemsets[itemset],
                            'Confidence': confidence,
                            'Lift': lift
                        })
    return rules

# 연관 규칙 생성
min_confidence = 0.5  # 최소 신뢰도
association_rules = generate_association_rules(frequent_itemsets, min_confidence)

# 결과 출력
print("Association Rules:")
for rule in association_rules:
    print(
        f"Rule: {rule['Antecedent']} -> {rule['Consequent']}, "
        f"Support: {rule['Support']:.2f}, "
        f"Confidence: {rule['Confidence']:.2f}, "
        f"Lift: {rule['Lift']:.2f}"
    )
print("Total Rules:", len(association_rules))

# 카테고리별 최고의 연관 규칙을 저장할 딕셔너리
best_rules = {}

# 각 카테고리별로 최고의 연관 규칙을 찾기
categories = ['한식', '중식', '일식', '양식', '카페']

for category in categories:
    best_rule = None
    best_confidence = 0  # confidence 기준으로 찾기

    # 각 규칙을 순회하면서
    for rule in association_rules:
        consequent = rule['Consequent']

        # 규칙의 결과 항목이 해당 카테고리인지 확인
        if category in consequent:
            # 가장 높은 confidence를 가진 규칙 선택
            if rule['Confidence'] > best_confidence:
                best_confidence = rule['Confidence']
                best_rule = rule

    # 최고의 규칙이 있을 경우 저장
    if best_rule is not None:
        best_rules[category] = {
            'Rule': f"{', '.join(best_rule['Antecedent'])} -> {', '.join(best_rule['Consequent'])}",
            'Support': best_rule['Support'],
            'Confidence': best_rule['Confidence'],
            'Lift': best_rule['Lift']
        }


# 결과 출력
print("Best Association Rule for Each Category (Where Category is the Consequent):")
for category, rule in best_rules.items():
    print(f"Category: {category}")
    print(f"  Rule: {rule['Rule']}")
    print(f"  Support: {rule['Support']:.2f}")
    print(f"  Confidence: {rule['Confidence']:.2f}")
    print(f"  Lift: {rule['Lift']:.2f}")
    print("-" * 40)

Association Rules:
Rule: {'중식'} -> {'한식'}, Support: 0.32, Confidence: 0.73, Lift: 1.82
Rule: {'한식'} -> {'중식'}, Support: 0.32, Confidence: 0.80, Lift: 1.82
Rule: {'중식'} -> {'일식'}, Support: 0.32, Confidence: 0.73, Lift: 2.27
Rule: {'일식'} -> {'중식'}, Support: 0.32, Confidence: 1.00, Lift: 2.27
Rule: {'양식'} -> {'중식'}, Support: 0.20, Confidence: 0.71, Lift: 1.62
Rule: {'한식'} -> {'카페'}, Support: 0.20, Confidence: 0.50, Lift: 1.14
Rule: {'일식'} -> {'한식'}, Support: 0.28, Confidence: 0.88, Lift: 2.19
Rule: {'한식'} -> {'일식'}, Support: 0.28, Confidence: 0.70, Lift: 2.19
Rule: {'한식'} -> {'양식'}, Support: 0.24, Confidence: 0.60, Lift: 2.14
Rule: {'양식'} -> {'한식'}, Support: 0.24, Confidence: 0.86, Lift: 2.14
Rule: {'양식'} -> {'카페'}, Support: 0.20, Confidence: 0.71, Lift: 1.62
Rule: {'일식'} -> {'양식'}, Support: 0.20, Confidence: 0.62, Lift: 2.23
Rule: {'양식'} -> {'일식'}, Support: 0.20, Confidence: 0.71, Lift: 2.23
Rule: {'중식', '일식'} -> {'한식'}, Support: 0.28, Confidence: 0.88, Lift: 2.19
Rule: {'중식', '한식'} -> {

# **두번째 Recommendation System**
Frequent Itemset, Association rule 포함

In [10]:
import numpy as np
import pandas as pd
from itertools import combinations
from collections import defaultdict
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity




# A-priori Algorithm: Function to find frequently occurring itemsets
def apriori(baskets, min_support):
    total_baskets = len(baskets)
    frequent_itemsets = {}
    candidate_itemsets = defaultdict(int)

    # Step 1: Count frequencies of 1-itemsets
    for basket in baskets:
        for item in basket:
            candidate_itemsets[frozenset([item])] += 1

    frequent_itemsets = {itemset: count / total_baskets for itemset, count in candidate_itemsets.items() if count / total_baskets >= min_support}

    k = 2
    while True:
        candidate_itemsets = defaultdict(int)
        for basket in baskets:
            for combo in combinations(set().union(*frequent_itemsets.keys()), k):
                if frozenset(combo).issubset(basket):
                    candidate_itemsets[frozenset(combo)] += 1

        current_frequent_itemsets = {itemset: count / total_baskets for itemset, count in candidate_itemsets.items() if count / total_baskets >= min_support}
        if not current_frequent_itemsets:
            break

        frequent_itemsets.update(current_frequent_itemsets)
        k += 1

    return frequent_itemsets

# Generate association rules
def generate_association_rules(frequent_itemsets, min_confidence=0.5):
    rules = []
    for itemset in frequent_itemsets.keys():
        if len(itemset) > 1:
            for antecedent in map(frozenset, combinations(itemset, len(itemset) - 1)):
                consequent = itemset - antecedent
                antecedent_support = frequent_itemsets.get(antecedent, 0)
                if antecedent_support > 0:
                    confidence = frequent_itemsets[itemset] / antecedent_support
                    if confidence >= min_confidence:
                        consequent_support = frequent_itemsets.get(consequent, 0)
                        lift = confidence / consequent_support if consequent_support > 0 else 0
                        rules.append({
                            'Antecedent': set(antecedent),
                            'Consequent': set(consequent),
                            'Support': frequent_itemsets[itemset],
                            'Confidence': confidence,
                            'Lift': lift
                        })
    return rules

# Recommendation system
def recommend_region(user_input, data, features, category_column, association_rules, weight=0.25):
    missing_features = [feature for feature in features if feature not in data.columns]
    if missing_features:
        print(f"Warning: Missing columns in the data: {', '.join(missing_features)}")
        return pd.DataFrame()

    selected_data = data[features].copy()
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(selected_data)

    user_vector = np.array([user_input[feature] for feature in features]).reshape(1, -1)
    scaled_user_vector = scaler.transform(user_vector)

    similarities = cosine_similarity(scaled_user_vector, scaled_data).flatten()
    data['Cosine Similarity'] = similarities

    if category_column in data.columns:
        category_scores = 1 - MinMaxScaler().fit_transform(data[[category_column]])
        data['Category Score'] = category_scores.flatten() * (1 - weight)

        for category, rule in association_rules.items():
            if category == category_column:
                antecedents = rule['Rule'].split(' -> ')[0].split(', ')
                confidence = rule['Confidence']
                for antecedent in antecedents:
                    if antecedent in data.columns:
                        mean_value = data[antecedent].mean()
                        for index, row in data.iterrows():
                            if row[antecedent] > mean_value:
                                data.at[index, 'Category Score'] += weight * confidence
    else:
        print(f"Warning: '{category_column}' column is missing in the data.")
        data['Category Score'] = 0

    data['Recommendation Score'] = data['Cosine Similarity'] * 0.5 + data['Category Score'] * 0.5
    recommended = data.sort_values(by='Recommendation Score', ascending=False)

    return recommended[['Region', 'Recommendation Score', 'Cosine Similarity', 'Category Score'] + features]


# User input
def main():
    # Data preparation (Replace 'average_data_by_region' with your DataFrame)
    average_data_by_region = pd.read_csv('/content/drive/MyDrive/average_data_by_region.csv')  # Load your data
    global recommendations  # 전역 변수로 선언

    columns_of_interest = ['한식', '일식', '중식', '양식', '카페']
    selected_columns = average_data_by_region[columns_of_interest]
    column_means = selected_columns.mean()

    baskets = []
    for _, row in average_data_by_region.iterrows():
        basket = []
        for category in columns_of_interest:
            if row[category] > column_means[category]:
                basket.append(category)
        baskets.append(basket)

    frequent_itemsets = apriori(baskets, min_support=0.2)
    association_rules = generate_association_rules(frequent_itemsets, min_confidence=0.5)

    best_rules = {}
    categories = columns_of_interest
    for category in categories:
        best_rule = None
        best_confidence = 0
        for rule in association_rules:
            consequent = rule['Consequent']
            if category in consequent and rule['Confidence'] > best_confidence:
                best_confidence = rule['Confidence']
                best_rule = rule
        if best_rule is not None:
            best_rules[category] = {
                'Rule': f"{', '.join(best_rule['Antecedent'])} -> {', '.join(best_rule['Consequent'])}",
                'Support': best_rule['Support'],
                'Confidence': best_rule['Confidence'],
                'Lift': best_rule['Lift']
            }

    user_input = {
        'Rent Price': int(input("Enter the Rent Price(81087~244673): ")),
        'Subway Station': int(input("Enter the number of Subway Stations: ")),
        'Population Density Score': int(input("Enter the Population Density Score(1~5): ")),
        'Store Density Score': int(input("Enter the Store Density Score(1~5): "))
    }

    available_categories = columns_of_interest
    selected_category = input(f"Select a restaurant category ({', '.join(available_categories)}): ")
    if selected_category not in available_categories:
        print(f"Invalid category. Please choose from {', '.join(available_categories)}.")
        return

    recommendations = recommend_region(user_input, average_data_by_region,
                                       features=['Rent Price', 'Subway Station', 'Population Density Score', 'Store Density Score'],
                                       category_column=selected_category,
                                       association_rules=best_rules)

    if not recommendations.empty:
        print("\nRecommended regions:")
        print(recommendations.head(5))
        print("\nTop choice:")
        print(recommendations.head(1)['Region'])
    else:
        print("\nNo recommendations available.")

if __name__ == '__main__':
    main()


Enter the Rent Price(81087~244673): 100000
Enter the number of Subway Stations: 3
Enter the Population Density Score(1~5): 3
Enter the Store Density Score(1~5): 4
Select a restaurant category (한식, 일식, 중식, 양식, 카페): 중식

Recommended regions:
   Region  Recommendation Score  Cosine Similarity  Category Score  \
5     광진구              0.833671           0.924831        0.742512   
1     마포구              0.825021           0.938637        0.711406   
9     성동구              0.816572           0.916842        0.716302   
14    중랑구              0.810901           0.899451        0.722350   
8     동작구              0.804993           0.919605        0.690380   

       Rent Price  Subway Station  Population Density Score  \
5   101582.000000            11.0                         5   
1   135116.666667            16.0                         4   
9   133941.833333            14.0                         4   
14   81087.000000             8.0                         4   
8   119571.166667        



**# 최적의 구에 대한 보고서 생성**

In [6]:
import pandas as pd
import matplotlib.pyplot as plt
from openpyxl import Workbook
from openpyxl.drawing.image import Image
from openpyxl.styles import Alignment, Font, Border, Side

# Define get_region_details function
def get_region_details(region_name, data):
    region_data = data[data['Region'] == region_name]
    if region_data.empty:
        print(f"No data found for region: {region_name}")
        return pd.DataFrame()  # Return empty DataFrame if no data found
    return region_data

# Function to save the survival graph as an image
def save_graph(region_name, survival_data, image_path='survival_graph.png'):
    plt.figure(figsize=(10, 6))
    survival_data.plot(kind='bar', color='skyblue', edgecolor='black')
    plt.title(f"Survival and Operating Period Details for Top1 Region")
    plt.xlabel("Metrics")
    plt.ylabel("Values")
    plt.xticks(rotation=45, ha='right')
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.savefig(image_path)  # Save the graph as an image
    plt.close()
    print(f"Graph saved as {image_path}")

# Function to style a range of cells
def style_range(sheet, start_row, start_col, end_row, end_col):
    thin = Side(border_style="thin", color="000000")
    for row in sheet.iter_rows(min_row=start_row, max_row=end_row, min_col=start_col, max_col=end_col):
        for cell in row:
            cell.alignment = Alignment(horizontal="center", vertical="center")
            cell.font = Font(bold=False)
            cell.border = Border(left=thin, right=thin, top=thin, bottom=thin)

# Function to save analysis results to Excel
def save_analysis_with_survival_graph(region_name, data, output_path='region_analysis_with_survival_graph.xlsx', graph_path='survival_graph.png'):
    region_details = get_region_details(region_name, data)

    if region_details.empty:
        print(f"No data available for region: {region_name}")
        return

    # Extract survival data for graph
    survival_data = region_details[['Average Operating Period',
                                     'Startup Survival Rate (1 Year)', 'Startup Survival Rate (3 Years)',
                                     'Startup Survival Rate (5 Years)', 'Annual Survival Rate (1 Year)',
                                     'Annual Survival Rate (3 Years)', 'Annual Survival Rate (5 Years)']].iloc[0]
    survival_data_df = pd.DataFrame(survival_data).reset_index()
    survival_data_df.columns = ['Metric', 'Value']

    # Save survival graph as an image
    save_graph(region_name, survival_data, graph_path)

    # Prepare summary data for Excel
    summary_table = region_details[['Population', 'Number of Stores', 'Rent Price',
                                     'Population Density', 'Store Density']].iloc[0]
    summary_table_df = pd.DataFrame(summary_table).reset_index()
    summary_table_df.columns = ['항목', '값']  # 한글로 컬럼 이름 변경
    summary_table_df['값'] = summary_table_df['값'].apply(
        lambda x: f"{int(x):,}" if isinstance(x, (int, float)) else x
    )

    # Prepare restaurant category data for Excel
    category_table = region_details[['한식가게수', '중식가게수', '일식가게수', '양식가게수', '카페가게수']].iloc[0]
    category_table_df = pd.DataFrame(category_table).reset_index()
    category_table_df.columns = ['음식점 유형', '개수']  # 한글로 컬럼 이름 변경
    category_table_df['개수'] = category_table_df['개수'].apply(
        lambda x: f"{int(x):,}" if isinstance(x, (int, float)) else x
    )

    # Create an Excel workbook and a single sheet
    workbook = Workbook()
    sheet = workbook.active
    sheet.title = "분석 결과"

    # Write summary data
    sheet.append(["[요약 정보]"])
    for row in summary_table_df.itertuples(index=False):
        sheet.append(row)

    start_row = 2
    end_row = start_row + len(summary_table_df)
    style_range(sheet, start_row, 1, end_row, 2)

    # Add spacing
    sheet.append([])
    sheet.append(["[음식점 정보]"])
    category_start = len(summary_table_df) + 4
    for row in category_table_df.itertuples(index=False):
        sheet.append(row)
    style_range(sheet, category_start, 1, category_start + len(category_table_df), 2)

    # Add the graph to the sheet
    img = Image(graph_path)
    img.anchor = f"B{category_start + len(category_table_df) + 2}"  # Position graph below the data
    sheet.add_image(img)

    # Save the workbook
    workbook.save(output_path)
    print(f"Analysis results with graph saved to {output_path}")

# Example execution
if not recommendations.empty:
    top_region = recommendations.iloc[0]['Region']  # 추천된 상권 중 가장 높은 점수를 받은 지역
    save_analysis_with_survival_graph(top_region, average_data_by_region)
else:
    print("No recommendations available for analysis.")


Graph saved as survival_graph.png
Analysis results with graph saved to region_analysis_with_survival_graph.xlsx
