<a href="https://colab.research.google.com/github/MalikaSh23/MalikaSh23/blob/main/virtual_brands_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import plotly.express as px

# Creating the DataFrame
df = pd.read_csv('/content/top-virtual-brand-analysis-clean.csv')

# Grouping by STATECODE to count the number of restaurants per state
state_counts = df.groupby("STATECODE").size().reset_index(name='count')

# Creating the treemap with data labels
fig = px.treemap(state_counts,
                 path=['STATECODE'],
                 values='count',
                 title="Treemap of Restaurant Counts by State")

# Customize the data labels
fig.update_traces(textinfo="label+value")

# Displaying the treemap
fig.show()

In [4]:
df.head()

Unnamed: 0,RESTAURANT_NAME,TYPE_OF_RESTAURANT,CITY,STATECODE,ZIP,RUN_ID,Check top or not,VIRTUAL_BRAND_ID,Food Specialty Type,Food Specialty,Cuisine
0,SCOOPED COOKIE DOUGH BAR,"Ice Cream and Frozen Yogurt, Desserts",SANTA ANA,CA,92707,Aug2024,286,VB00214,Desserts,Cookie Dough,American
1,SCOOPED COOKIE DOUGH BAR,"Ice Cream and Frozen Yogurt, Desserts",SAN FRANCISCO,CA,94107,Aug2024,286,VB00215,Desserts,Cookie Dough,American
2,SCOOPED COOKIE DOUGH BAR,"Ice Cream and Frozen Yogurt, Desserts",LONG BEACH,CA,90813,Aug2024,286,VB00216,Desserts,Cookie Dough,American
3,SCOOPED COOKIE DOUGH BAR,"Ice Cream and Frozen Yogurt, Desserts",AUSTIN,TX,78751,Aug2024,286,VB00217,Desserts,Cookie Dough,American
4,SCOOPED COOKIE DOUGH BAR,"Ice Cream and Frozen Yogurt, Desserts",COSTA MESA,CA,92627,Aug2024,286,VB00218,Desserts,Cookie Dough,American


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18460 entries, 0 to 18459
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   RESTAURANT_NAME      18460 non-null  object
 1   TYPE_OF_RESTAURANT   18460 non-null  object
 2   CITY                 18460 non-null  object
 3   STATECODE            18460 non-null  object
 4   ZIP                  18460 non-null  int64 
 5   RUN_ID               18460 non-null  object
 6   Check top or not     18460 non-null  int64 
 7   VIRTUAL_BRAND_ID     18460 non-null  object
 8   Food Specialty Type  18460 non-null  object
 9   Food Specialty       18382 non-null  object
 10  Cuisine              18460 non-null  object
dtypes: int64(2), object(9)
memory usage: 1.5+ MB


In [8]:
import pandas as pd
import plotly.express as px

# Filter data to include only the states CA, FL, TX, and NY
filtered_df = df[df['STATECODE'].isin(['CA', 'FL', 'TX', 'NY'])]

# Group by STATECODE and Food Specialty Type to get counts
cuisine_counts = (filtered_df
                  .groupby(['STATECODE', 'Food Specialty'])
                  .size()
                  .reset_index(name='count'))

# Get the top 10 food specialties per state by applying head(10) after sorting
top_cuisines = (cuisine_counts
                .sort_values(['STATECODE', 'count'], ascending=[True, False])
                .groupby('STATECODE')
                .apply(lambda x: x.head(10))
                .reset_index(drop=True))

# Find the maximum count for setting a uniform Y-axis range
max_count = top_cuisines['count'].max()

# Visualize the top 10 food specialties using a bar plot with simplified labels
fig = px.bar(top_cuisines,
             x='Food Specialty',
             y='count',
             color='STATECODE',
             facet_col='STATECODE',
             title="Top 10 Specialties in Selected States",
             labels={'count': 'Number of Restaurants',
                     'Food Specialty': 'Food Specialty'},
             height=600)

# Update layout for a common Y-axis title and uniform Y-axis range
fig.update_layout(showlegend=False,
                  title={'text': "Top 10 Cuisines in CA, FL, TX, and NY", 'x':0.5},
                  margin=dict(t=50, l=50, r=50, b=50),
                  yaxis_title='Number of Restaurants')

# Set the same Y-axis range for all facets
fig.update_yaxes(range=[0, max_count])

# Show the plot
fig.show()



In [None]:
6

In [None]:
top_cuisines[top_cuisines['STATECODE']=='RI']

Unnamed: 0,STATECODE,Food Specialty,count


In [11]:
import pandas as pd
import plotly.express as px

# Group by STATECODE and RESTAURANT_NAME to get the count of each restaurant's locations in each state
restaurant_counts = (df.groupby(['STATECODE', 'RESTAURANT_NAME'])
                     .size()
                     .reset_index(name='location_count'))

# Sort by STATECODE and location_count in descending order
sorted_restaurant_counts = restaurant_counts.sort_values(['STATECODE', 'location_count'], ascending=[True, False])

# Get the biggest player (restaurant with the most locations) in each state
biggest_player_per_state = sorted_restaurant_counts.groupby('STATECODE').head(1).reset_index(drop=True)

# Now we want to find the top 7 restaurants across all states
top_7_restaurants = biggest_player_per_state.nlargest(7, 'location_count')

# Create a new column 'Category' to categorize into 'Top 7' or 'Other'
biggest_player_per_state['Category'] = biggest_player_per_state['RESTAURANT_NAME'].apply(
    lambda x: x if x in top_7_restaurants['RESTAURANT_NAME'].values else 'OTHERS'
)

# Group by 'Category' to get the counts for the pie chart
pie_data = (biggest_player_per_state.groupby('Category')['location_count']
            .sum()
            .reset_index())

# Ensure 'OTHERS' is at the end of the list and include all categories
categories = [cat for cat in pie_data['Category'] if cat != 'OTHERS'] + ['OTHERS']
pie_data = pie_data.set_index('Category').reindex(categories).reset_index()

# Create the pie chart with data labels
fig = px.pie(pie_data,
             values='location_count',
             names='Category',
             title='Top 7 Biggest Virtual Brand Players vs. Other',
             hole=0.5)

# Update textinfo to include the count and percentage in the data labels
fig.update_traces(textinfo='label+percent+value',
                  textfont_size=8)

# Update layout to ensure 'OTHERS' is at the bottom in the legend
fig.update_layout(
    legend_title_text='Restaurant Category',
    legend=dict(
        orientation="v",
        yanchor="top",
        y=0.5,
        xanchor="left",
        x=1.05,
        traceorder='reversed'  # Reverse the order so 'OTHERS' is at the bottom
    )
)

# Show the pie chart
fig.show()

In [None]:
import pandas as pd
import plotly.express as px

# Assuming 'state_counts' DataFrame is ready as before

# Create a choropleth map
fig = px.choropleth(state_counts,
                    locations='STATECODE',
                    locationmode='USA-states',
                    color='count',
                    scope='usa',
                    color_continuous_scale='Blues',
                    labels={'count': 'Number of Virtual Restaurants'},
                    title='Number of Virtual Restaurants by State')

# Update layout for better spacing and adjust the size of the map and legend
fig.update_layout(
    geo=dict(
        bgcolor='rgba(0,0,0,0)',  # Transparent background
        showframe=False,          # Remove the frame around the map
        projection_scale=1.1      # Adjust the scale to make the map bigger
    ),
    title={'x': 0.5},
    margin={"r": 0, "t": 50, "l": 0, "b": 0},  # Adjust margins to increase map size
    coloraxis_colorbar=dict(
        title="Number of Virtual Restaurants",
        thicknessmode="pixels",  # Set thickness in pixels
        thickness=15,            # Make the legend thinner
        lenmode="fraction",      # Set length in fraction of the map
        len=0.5,                 # Make the legend shorter
        yanchor="middle",        # Center the legend vertically
        y=0.5                    # Position the legend at the center of the map
    )
)

# Show the map
fig.show()

In [None]:
df_virtual=pd.read_csv('/content/Summary of Anlaysis of Virtual Brands - analysis data.csv')

In [None]:
df_virtual.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18460 entries, 0 to 18459
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   RESTAURANT_NAME      18460 non-null  object
 1   TYPE_OF_RESTAURANT   18460 non-null  object
 2   CITY                 18460 non-null  object
 3   STATECODE            18460 non-null  object
 4   ZIP                  18460 non-null  int64 
 5   RUN_ID               18460 non-null  object
 6   COUNT OF OUTLETS     18460 non-null  int64 
 7   VIRTUAL_BRAND_ID     18460 non-null  object
 8   Food Specialty Type  18460 non-null  object
 9   Food Specialty       18382 non-null  object
 10  Cuisine              18460 non-null  object
 11  Remark               18460 non-null  object
dtypes: int64(2), object(10)
memory usage: 1.7+ MB


In [None]:
# Calculate the proportion of each Remark category in the entire dataset
us_proportions = (df_virtual['Remark']
                  .value_counts(normalize=True)
                  .reset_index())
us_proportions.columns = ['Remark', 'Proportion']

# Visualize the proportions with a pie chart
fig_us = px.pie(us_proportions,
                values='Proportion',
                names='Remark',
                title='Virtual only or not?',
                hole=0.3)

# Show the pie chart
fig_us.show()


In [None]:
# Calculate the count of each Remark category within each state
state_remark_counts = df_virtual.groupby(['STATECODE', 'Remark']).size().reset_index(name='Count')

# Calculate the total count of restaurants in each state
state_total_counts = df_virtual.groupby('STATECODE').size().reset_index(name='Total')

# Merge the two DataFrames to calculate proportions
state_proportions = pd.merge(state_remark_counts, state_total_counts, on='STATECODE')

# Calculate the proportion of each Remark category within each state
state_proportions['Proportion'] = state_proportions['Count'] / state_proportions['Total']

# Display the state-wise proportions DataFrame
print(state_proportions.head())


  STATECODE                Remark  Count  Total  Proportion
0        AK          Virtual Only      1      7    0.142857
1        AK  Virtual and Physical      6      7    0.857143
2        AL          Virtual Only     12     92    0.130435
3        AL  Virtual and Physical     80     92    0.869565
4        AR          Virtual Only      7    123    0.056911


In [None]:
# Visualize the state-wise proportions with a faceted bar chart
fig_state = px.bar(state_proportions,
                   x='Remark',
                   y='Proportion',
                   color='Remark',
                   facet_col='STATECODE',
                   facet_col_wrap=5,  # Adjusts the number of columns in the facet
                   title='Proportion of Restaurants by Remark in Each State',
                   height=1000)

# Update layout for better spacing
fig_state.update_layout(showlegend=False)

# Show the bar chart
fig_state.show()