# Pandas Practice with Fast Food Data
Author: JAAR

Date: 07/22/2025

In [2]:
# imports
import pandas as pd

In [3]:
# Load the fast food data
df=pd.read_csv('data/US_top_50_fast_foods.csv')

Get the shape and basic information for the dataset

In [4]:
df.shape

(50, 7)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 7 columns):
 #   Column                                            Non-Null Count  Dtype 
---  ------                                            --------------  ----- 
 0   Fast-Food Chains                                  50 non-null     object
 1   U.S. Systemwide Sales (Millions - U.S Dollars)    50 non-null     int64 
 2   Average Sales per Unit (Thousands - U.S Dollars)  50 non-null     int64 
 3   Franchised Stores                                 50 non-null     int64 
 4   Company Stores                                    50 non-null     int64 
 5   2021 Total Units                                  50 non-null     int64 
 6   Total Change in Units from 2020                   50 non-null     int64 
dtypes: int64(6), object(1)
memory usage: 2.9+ KB


What are the data types?

In [6]:
df.dtypes

Fast-Food Chains                                    object
U.S. Systemwide Sales (Millions - U.S Dollars)       int64
Average Sales per Unit (Thousands - U.S Dollars)     int64
Franchised Stores                                    int64
Company Stores                                       int64
2021 Total Units                                     int64
Total Change in Units from 2020                      int64
dtype: object

Are there any null values?

In [7]:
df.isnull().sum().sum()

np.int64(0)

Replace all of the columns spaces with underscores

In [8]:
df.columns=df.columns.str.replace(' ', '_')

For the series containing the fast food chains, change it to just chain

In [9]:
df.rename(columns={'Fast-Food_Chains':'chain'}, inplace=True)

Get a sample of five entries

In [10]:
df.sample(5)

Unnamed: 0,chain,U.S._Systemwide_Sales_(Millions_-_U.S_Dollars),Average_Sales_per_Unit_(Thousands_-_U.S_Dollars),Franchised_Stores,Company_Stores,2021_Total_Units,Total_Change_in_Units_from_2020
33,Papa Murphy’s,809,643,1213,27,1240,-53
35,Popeyes Louisiana Kitchen,4775,1867,2716,41,2754,146
7,Chipotle,7547,2641,0,2966,2966,198
12,Domino's,8641,1317,6185,375,6560,205
9,Culver’s,2489,3099,831,6,837,55


Rename 'Franchised Stores' as'stores_franchised' and 'Company Stores' as 'company_stores'

In [11]:
df.rename(columns={
    'Franchised_Stores':'franchised',
    'Company_Stores':'company_stores'
}, inplace=True)

Order the companies by number of franchises descending and ascending by names

In [12]:
df.sort_values(by=['franchised', 'chain'], ascending=[False, True]).head()

Unnamed: 0,chain,U.S._Systemwide_Sales_(Millions_-_U.S_Dollars),Average_Sales_per_Unit_(Thousands_-_U.S_Dollars),franchised,company_stores,2021_Total_Units,Total_Change_in_Units_from_2020
41,Subway,9350,438,21147,0,21147,-1043
28,McDonald’s,45960,3420,12775,663,13438,244
13,Dunkin',10416,1127,9244,0,9244,161
3,Burger King,10033,1470,7054,51,7105,24
42,Taco Bell,12600,1823,6540,462,7002,203


Retrieve the first three columns

In [13]:
df.iloc[:,0:3].head() # easier since the names for the columns are complicated as fuq

Unnamed: 0,chain,U.S._Systemwide_Sales_(Millions_-_U.S_Dollars),Average_Sales_per_Unit_(Thousands_-_U.S_Dollars)
0,Arby’s,4462,1309
1,Baskin-Robbins,686,296
2,Bojangles,1485,1924
3,Burger King,10033,1470
4,Carl’s Jr.,1560,1400


Get every other row

In [14]:
df.iloc[::2].head() # truncated with head to show that it works

Unnamed: 0,chain,U.S._Systemwide_Sales_(Millions_-_U.S_Dollars),Average_Sales_per_Unit_(Thousands_-_U.S_Dollars),franchised,company_stores,2021_Total_Units,Total_Change_in_Units_from_2020
0,Arby’s,4462,1309,2293,1116,3409,40
2,Bojangles,1485,1924,496,277,773,15
4,Carl’s Jr.,1560,1400,1011,47,1058,-21
6,Chick-fil-A,16700,6100,2650,82,2732,155
8,Church’s Chicken,776,870,731,161,892,-13


What companies have 2616, 831, and 2293 stores franchised?

In [15]:
df.loc[df.franchised.isin([2616, 831, 2293])]

Unnamed: 0,chain,U.S._Systemwide_Sales_(Millions_-_U.S_Dollars),Average_Sales_per_Unit_(Thousands_-_U.S_Dollars),franchised,company_stores,2021_Total_Units,Total_Change_in_Units_from_2020
0,Arby’s,4462,1309,2293,1116,3409,40
9,Culver’s,2489,3099,831,6,837,55
22,Jimmy John’s,2301,866,2616,41,2657,48


Assign the chain column as the index column

In [16]:
df.set_index('chain', inplace=True)

Which company has the most franchised stores?

In [17]:
# Two way of solving this. If we just need the index, if we need a list, or if we need the max
df.franchised.idxmax()

'Subway'

Which company has the highest percentage of franchised stores?

In [18]:
(df.franchised / (df.franchised + df.company_stores)).sort_values(ascending=False).head()

chain
Baskin-Robbins    1.000000
Dunkin'           1.000000
Subway            1.000000
Tim Hortons       1.000000
Dairy Queen       0.999539
dtype: float64

Of the companies with 100% franchising, which has the most stores?

In [19]:
df.loc[df.index.isin(['Subway', 'Dunkin\'', 'Baskin-Robbins', 'Tim Hortons']), 'franchised']

chain
Baskin-Robbins     2317
Dunkin'            9244
Subway            21147
Tim Hortons         637
Name: franchised, dtype: int64

Rename the 2021 units column to store_count, 2020 unit change column to store_count_change, us sustemwide sales to sales_in_millions and average sales per unit to store_sales_thousands

In [20]:
# Maybe also change the mil and store sales at this time?
df.rename(columns={
    'Total_Change_in_Units_from_2020':'store_count_change',
    '2021_Total_Units':'store_count',
    'U.S._Systemwide_Sales_(Millions_-_U.S_Dollars)':'sales_in_millions',
    'Average_Sales_per_Unit_(Thousands_-_U.S_Dollars)':'store_sales_thousands'
}, inplace=True)

Create a column that has bool that reflects positive changes in store counts as True and negative as False

In [21]:
df['positive_store_count']=(df.store_count > 0)

Get both the first and last chains?

In [22]:
df.iloc[[0, -1]]

Unnamed: 0_level_0,sales_in_millions,store_sales_thousands,franchised,company_stores,store_count,store_count_change,positive_store_count
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Arby’s,4462,1309,2293,1116,3409,40,True
Zaxby’s,2233,2484,761,147,908,3,True


Drop the Total Changes column and store this as a new df

In [23]:
drop_change=df.drop('store_count_change', axis=1)

Create a new column called positive growth where chains that opened stores have a 1 and chains that don't have a 0

In [24]:
df['positive_growth']=df.positive_store_count.astype(int)

Convert systemwide sales to thousands and store the value as a new column then drop the old column

In [25]:
df['sales_in_thousands'] = df.sales_in_millions * 1000

In [26]:
df=df.drop('sales_in_millions', axis=1)

Sort chains by sales per store descending and name ascending

In [27]:
df.sort_values(by=['store_sales_thousands', 'chain'], ascending=[False, True]).head()

Unnamed: 0_level_0,store_sales_thousands,franchised,company_stores,store_count,store_count_change,positive_store_count,positive_growth,sales_in_thousands
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Chick-fil-A,6100,2650,82,2732,155,True,1,16700000
Raising Cane’s,4893,23,544,567,58,True,1,2377000
Krispy Kreme,4000,51,307,358,6,True,1,996000
Shake Shack,3679,25,218,243,38,True,1,777000
Whataburger,3640,131,742,873,29,True,1,3089000


Create a series that truncates the store count. Round store count to 5000 and 500 respectively for companies with more and less than those values

In [28]:
df.store_count.clip(lower=500, upper=5000).sample(10)

chain
El Pollo Loco                              500
Papa Johns                                3164
QDOBA                                      739
Freddy’s Frozen Custard & Steakburgers     500
Tropical Smoothie Cafe                    1039
Hardee’s                                  1734
Jack in the Box                           2218
Wendy's                                   5000
Carl’s Jr.                                1058
Pizza Hut                                 5000
Name: store_count, dtype: int64

Get all the companies with sales above the mean sales

In [29]:
df.loc[df.sales_in_thousands.mean() > df.sales_in_thousands].head()

Unnamed: 0_level_0,store_sales_thousands,franchised,company_stores,store_count,store_count_change,positive_store_count,positive_growth,sales_in_thousands
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Arby’s,1309,2293,1116,3409,40,True,1,4462000
Baskin-Robbins,296,2317,0,2317,102,True,1,686000
Bojangles,1924,496,277,773,15,True,1,1485000
Carl’s Jr.,1400,1011,47,1058,-21,True,1,1560000
Checkers/Rally’s,1145,568,266,834,-13,True,1,931000


Create a list of chains that have over 80% franchised and over 80% company owned then sort by Revenue

In [30]:
df.loc[(.8 > (df.franchised / df.store_count)) & (.8 > (df.company_stores / df.store_count))].sort_values(by='sales_in_thousands', ascending=False)

Unnamed: 0_level_0,store_sales_thousands,franchised,company_stores,store_count,store_count_change,positive_store_count,positive_growth,sales_in_thousands
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Starbucks,1200,6497,8953,15450,113,True,1,24300000
Panera Bread,2700,1130,950,2080,-25,True,1,5650000
Arby’s,1309,2293,1116,3409,40,True,1,4462000
Five Guys,3172,911,479,1390,8,True,1,2093000
Bojangles,1924,496,277,773,15,True,1,1485000
El Pollo Loco,2000,292,189,481,1,True,1,973000
Checkers/Rally’s,1145,568,266,834,-13,True,1,931000
Del Taco,1551,306,294,600,4,True,1,931000
QDOBA,1006,406,333,739,2,True,1,835000


Display the chains with more than 3000 (thousands) in sales and a store count of at least 500

In [31]:
df[((df.store_sales_thousands > 3000) & (df.store_count > 500))]

Unnamed: 0_level_0,store_sales_thousands,franchised,company_stores,store_count,store_count_change,positive_store_count,positive_growth,sales_in_thousands
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Chick-fil-A,6100,2650,82,2732,155,True,1,16700000
Culver’s,3099,831,6,837,55,True,1,2489000
Five Guys,3172,911,479,1390,8,True,1,2093000
McDonald’s,3420,12775,663,13438,244,True,1,45960000
Raising Cane’s,4893,23,544,567,58,True,1,2377000
Whataburger,3640,131,742,873,29,True,1,3089000


Find the companies that are in the 80% for sales and 80% for store counts

In [32]:
df[((df.sales_in_thousands > df.sales_in_thousands.quantile(.8)) &
 (df.store_count > df.store_count.quantile(.8)))]

Unnamed: 0_level_0,store_sales_thousands,franchised,company_stores,store_count,store_count_change,positive_store_count,positive_growth,sales_in_thousands
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Burger King,1470,7054,51,7105,24,True,1,10033000
Domino's,1317,6185,375,6560,205,True,1,8641000
Dunkin',1127,9244,0,9244,161,True,1,10416000
McDonald’s,3420,12775,663,13438,244,True,1,45960000
Starbucks,1200,6497,8953,15450,113,True,1,24300000
Subway,438,21147,0,21147,-1043,True,1,9350000
Taco Bell,1823,6540,462,7002,203,True,1,12600000
Wendy's,1895,5535,403,5938,57,True,1,11111000


In [33]:
# df['artist_count']=(df['artists'].str.count(',').fillna(-1).astype(int) + 1)

In [None]:
# df.dropna().drop([...]).rename([...]).sort_values().head() CHAINING EXAMPLE
# df.drop(df.loc[df["Revenue"] < 80_000].index).sort_values(by='Revenue')
# df.drop(df.loc[df.Revenue < df.Revenue.mean()].index)
# df_usa_only = df.drop(df.loc[(~df['Is American?'])].index) is the same as df.loc[df['Is America']]
# removing columns by dropping them and then also renaming columns are things worth looking into

# datetime accessor with at .dt.year .dt.month
# df["Birthday"] = df['Birth Date'].dt.strftime("%m-%d")


Replace Multiples

In [35]:
# FIND A WAY TO TEST REPLACE, MULTIPLES
# df.key.replace({
#     0:'C', 1:'C#', 2:'D', 3:'D#', 4:'E', 5:'F', 6:'F#', 7:'G',
#     8:'G#', 9:'A', 10:'A#', 11:'B'
# }, inplace=True
# )

## Questions created by ChatGPT from easy to difficult
Following is a list of 30 questions produced by ChatGPT to practice my understanding of Pandas

1.	Get the Series of fast-food chain names.

In [36]:
pd.Series(df.index)

0                                     Arby’s
1                             Baskin-Robbins
2                                  Bojangles
3                                Burger King
4                                 Carl’s Jr.
5                           Checkers/Rally’s
6                                Chick-fil-A
7                                   Chipotle
8                           Church’s Chicken
9                                   Culver’s
10                               Dairy Queen
11                                  Del Taco
12                                  Domino's
13                                   Dunkin'
14                             El Pollo Loco
15                            Firehouse Subs
16                                 Five Guys
17    Freddy’s Frozen Custard & Steakburgers
18                                  Hardee’s
19                           In-N-Out Burger
20                           Jack in the Box
21                             Jersey Mike’s
22        

2.	Count the number of null values in each column.

In [37]:
df.isnull().sum()

store_sales_thousands    0
franchised               0
company_stores           0
store_count              0
store_count_change       0
positive_store_count     0
positive_growth          0
sales_in_thousands       0
dtype: int64

In [38]:
# Sum of all nulled values
df.isnull().sum().sum()

np.int64(0)

3.	Get the Series of total U.S. sales.

In [39]:
total_sales=df.sales_in_thousands

In [40]:
total_sales.describe()

count    5.000000e+01
mean     4.965060e+06
std      7.531439e+06
min      6.150000e+05
25%      9.352500e+05
50%      2.289500e+06
75%      5.400000e+06
max      4.596000e+07
Name: sales_in_thousands, dtype: float64

4.	Find the maximum sales value.

In [41]:
total_sales.max()

np.int64(45960000)

5.	Find the chain with the minimum number of store counts.

In [42]:
df.store_count.min()

np.int64(243)

6.	Get a Series showing whether each chain is American.

7.	Get a boolean Series with sales greater than five billion?

In [43]:
over_5_b_sales=df[df.sales_in_thousands > 5000000]

8.	Count how many companies sell over 5 billion.

In [44]:
over_5_b_sales.shape

(14, 8)

9.	Drop the five franchises with the lowest overall sales

In [45]:
lowest_franchises=df.sort_values(by='sales_in_thousands').head().index

In [46]:
df.drop(lowest_franchises, inplace=True)

10.	Find the average number of stores.

In [47]:
df.store_count.mean()

np.float64(3421.9555555555557)

In [48]:
df.store_count.describe()

count       45.000000
mean      3421.955556
std       4268.291365
min        243.000000
25%        873.000000
50%       2080.000000
75%       3953.000000
max      21147.000000
Name: store_count, dtype: float64

11.	Get all chain names with sales over $5B (as Series).

In [49]:
df.columns

Index(['store_sales_thousands', 'franchised', 'company_stores', 'store_count',
       'store_count_change', 'positive_store_count', 'positive_growth',
       'sales_in_thousands'],
      dtype='object')

12.	Sort the Series of total stores and franchises in descending and ascending orders respectively.

In [50]:
df.sort_values(by=['store_count','franchised'],
               ascending=[True, True]).head()

Unnamed: 0_level_0,store_sales_thousands,franchised,company_stores,store_count,store_count_change,positive_store_count,positive_growth,sales_in_thousands
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Shake Shack,3679,25,218,243,38,True,1,777000
Krispy Kreme,4000,51,307,358,6,True,1,996000
In-N-Out Burger,3200,0,370,370,5,True,1,1175000
El Pollo Loco,2000,292,189,481,1,True,1,973000
McAlister’s Deli,1866,472,33,505,24,True,1,869000


13.	Create a Series of lowercase chain names.

In [51]:
df.index.str.lower()

Index(['arby’s', 'bojangles', 'burger king', 'carl’s jr.', 'checkers/rally’s',
       'chick-fil-a', 'chipotle', 'church’s chicken', 'culver’s',
       'dairy queen', 'del taco', 'domino's', 'dunkin'', 'el pollo loco',
       'firehouse subs', 'five guys', 'hardee’s', 'in-n-out burger',
       'jack in the box', 'jersey mike’s', 'jimmy john’s', 'kfc',
       'krispy kreme', 'little caesars', 'marco’s pizza', 'mcalister’s deli',
       'mcdonald’s', 'panda express', 'panera bread', 'papa johns',
       'papa murphy’s', 'pizza hut', 'popeyes louisiana kitchen', 'qdoba',
       'raising cane’s', 'shake shack', 'sonic drive-in', 'starbucks',
       'subway', 'taco bell', 'tropical smoothie cafe', 'wendy's',
       'whataburger', 'wingstop', 'zaxby’s'],
      dtype='object', name='chain')

14.	Get chains that are not American.

15.	Create a Series showing sales per unit.

In [52]:
df.store_sales_thousands.head(5)

chain
Arby’s              1309
Bojangles           1924
Burger King         1470
Carl’s Jr.          1400
Checkers/Rally’s    1145
Name: store_sales_thousands, dtype: int64

16.	Check which chains have "Pizza" in their name.

In [53]:
df[df.index.str.contains('Pizza', case=False)]

Unnamed: 0_level_0,store_sales_thousands,franchised,company_stores,store_count,store_count_change,positive_store_count,positive_growth,sales_in_thousands
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Marco’s Pizza,934,957,45,1002,48,True,1,899000
Pizza Hut,1022,6526,22,6548,-13,True,1,5500000


17.	Get the 5 smallest chains by number of stores.

In [54]:
df.sort_values(by='store_count').head()

Unnamed: 0_level_0,store_sales_thousands,franchised,company_stores,store_count,store_count_change,positive_store_count,positive_growth,sales_in_thousands
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Shake Shack,3679,25,218,243,38,True,1,777000
Krispy Kreme,4000,51,307,358,6,True,1,996000
In-N-Out Burger,3200,0,370,370,5,True,1,1175000
El Pollo Loco,2000,292,189,481,1,True,1,973000
McAlister’s Deli,1866,472,33,505,24,True,1,869000


18.	Rename the column "U.S. Systemwide Sales (Millions)" to lowercase using .rename().

19.	Find the chain with the second highest sales.

In [55]:
df.sort_values(by='sales_in_thousands', ascending=False).head(3)

Unnamed: 0_level_0,store_sales_thousands,franchised,company_stores,store_count,store_count_change,positive_store_count,positive_growth,sales_in_thousands
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
McDonald’s,3420,12775,663,13438,244,True,1,45960000
Starbucks,1200,6497,8953,15450,113,True,1,24300000
Chick-fil-A,6100,2650,82,2732,155,True,1,16700000


In [56]:
# A great way to do this that I hadn't learned yet. Try to apply it
df.nlargest(2, 'sales_in_thousands').iloc[1]

store_sales_thousands        1200
franchised                   6497
company_stores               8953
store_count                 15450
store_count_change            113
positive_store_count         True
positive_growth                 1
sales_in_thousands       24300000
Name: Starbucks, dtype: object

20.	Create a Series: is chain name longer than 10 characters?

In [57]:
pd.Series(df.index.str.len() > 10).head()

0    False
1    False
2     True
3    False
4     True
dtype: bool

21.	Create a Series showing sales rank (1 = highest) and convert the result to integers.


In [66]:
rank_sales=df.sales_in_thousands.rank(ascending=False).astype(int)

In [67]:
rank_sales.idxmin()

'McDonald’s'

22.	Get the average sales for chains with between 1000 and 3000 stores

In [72]:
df.loc[df.store_count.between(1000, 3000), 'store_count'].mean()

np.float64(1873.625)

23.	Create a Series of chain names sorted by store sales.

In [76]:
df.sort_values(by='store_sales_thousands').head()

Unnamed: 0_level_0,store_sales_thousands,franchised,company_stores,store_count,store_count_change,positive_store_count,positive_growth,sales_in_thousands
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Subway,438,21147,0,21147,-1043,True,1,9350000
Papa Murphy’s,643,1213,27,1240,-53,True,1,809000
Jimmy John’s,866,2616,41,2657,48,True,1,2301000
Church’s Chicken,870,731,161,892,-13,True,1,776000
Firehouse Subs,909,1101,39,1140,9,True,1,1044000


24.	Count how many chains contain "Burger" or "Chicken" in the name.


In [78]:
df.index.str.contains('Burger|Chicken', case=False).sum()

np.int64(4)

25.	Normalize the sales column (0–1 range).

In [None]:
# Normalizing 

26.	Create a Series flagging "giants" (sales > 5000 & total units > 10000).

In [84]:
df[
    (df.sales_in_thousands > 5000) &
    (df.store_count > 10000)
]

Unnamed: 0_level_0,store_sales_thousands,franchised,company_stores,store_count,store_count_change,positive_store_count,positive_growth,sales_in_thousands
chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
McDonald’s,3420,12775,663,13438,244,True,1,45960000
Starbucks,1200,6497,8953,15450,113,True,1,24300000
Subway,438,21147,0,21147,-1043,True,1,9350000


27.	Create a Series of first letters of chain names.

In [85]:
df.index.str[0]

Index(['A', 'B', 'B', 'C', 'C', 'C', 'C', 'C', 'C', 'D', 'D', 'D', 'D', 'E',
       'F', 'F', 'H', 'I', 'J', 'J', 'J', 'K', 'K', 'L', 'M', 'M', 'M', 'P',
       'P', 'P', 'P', 'P', 'P', 'Q', 'R', 'S', 'S', 'S', 'S', 'T', 'T', 'W',
       'W', 'W', 'Z'],
      dtype='object', name='chain')

28.	Replace all spaces in chain names with underscores.

In [86]:
df.index.str.replace(' ', '_')

Index(['Arby’s', 'Bojangles', 'Burger_King', 'Carl’s_Jr.', 'Checkers/Rally’s',
       'Chick-fil-A', 'Chipotle', 'Church’s_Chicken', 'Culver’s',
       'Dairy_Queen', 'Del_Taco', 'Domino's', 'Dunkin'', 'El_Pollo_Loco',
       'Firehouse_Subs', 'Five_Guys', 'Hardee’s', 'In-N-Out_Burger',
       'Jack_in_the_Box', 'Jersey_Mike’s', 'Jimmy_John’s', 'KFC',
       'Krispy_Kreme', 'Little_Caesars', 'Marco’s_Pizza', 'McAlister’s_Deli',
       'McDonald’s', 'Panda_Express', 'Panera_Bread', 'Papa_Johns',
       'Papa_Murphy’s', 'Pizza_Hut', 'Popeyes_Louisiana_Kitchen', 'QDOBA',
       'Raising_Cane’s', 'Shake_Shack', 'Sonic_Drive-In', 'Starbucks',
       'Subway', 'Taco_Bell', 'Tropical_Smoothie_Cafe', 'Wendy's',
       'Whataburger', 'Wingstop', 'Zaxby’s'],
      dtype='object', name='chain')

29.	Bucket sales into categories: Low (<1000), Medium (1000–5000), High (>5000)

30.	Get a Series of boolean values: does chain name start with "M"?

In [88]:
df.index.str.startswith('M')

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False,  True,  True,  True,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False])