In [1]:
import numpy as np
import pandas as pd

In [3]:
tickers = ["AAPL", "META", "MSFT", None, "TSLA"]
lower_tickers = []

# Inefficient as operation can be vectorized
# Cannot handle None in the list. It will throw and error
for ticker in tickers: 
    lower_tickers.append(ticker.lower()) # throws and error

AttributeError: 'NoneType' object has no attribute 'lower'

In [7]:
p_tickers = pd.Series(tickers)
p_tickers.str.lower()

0    aapl
1    meta
2    msft
3    None
4    tsla
dtype: object

In [11]:
df = pd.read_csv('files/constituents-financials_csv.csv')
df.head()

Unnamed: 0,Symbol,Name,Sector,Price,Price/Earnings,Dividend Yield,Earnings/Share,52 Week Low,52 Week High,Market Cap,EBITDA,Price/Sales,Price/Book,SEC Filings
0,MMM,3M Company,Industrials,222.89,24.31,2.332862,7.92,259.77,175.49,138721055226,9048000000.0,4.390271,11.34,http://www.sec.gov/cgi-bin/browse-edgar?action...
1,AOS,A.O. Smith Corp,Industrials,60.24,27.76,1.147959,1.7,68.39,48.925,10783419933,601000000.0,3.575483,6.35,http://www.sec.gov/cgi-bin/browse-edgar?action...
2,ABT,Abbott Laboratories,Health Care,56.27,22.51,1.908982,0.26,64.6,42.28,102121042306,5744000000.0,3.74048,3.19,http://www.sec.gov/cgi-bin/browse-edgar?action...
3,ABBV,AbbVie Inc.,Health Care,108.48,19.41,2.49956,3.29,125.86,60.05,181386347059,10310000000.0,6.291571,26.14,http://www.sec.gov/cgi-bin/browse-edgar?action...
4,ACN,Accenture plc,Information Technology,150.51,25.47,1.71447,5.44,162.6,114.82,98765855553,5643228000.0,2.604117,10.62,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 505 entries, 0 to 504
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Symbol          505 non-null    object 
 1   Name            505 non-null    object 
 2   Sector          505 non-null    object 
 3   Price           505 non-null    float64
 4   Price/Earnings  503 non-null    float64
 5   Dividend Yield  505 non-null    float64
 6   Earnings/Share  505 non-null    float64
 7   52 Week Low     505 non-null    float64
 8   52 Week High    505 non-null    float64
 9   Market Cap      505 non-null    int64  
 10  EBITDA          505 non-null    float64
 11  Price/Sales     505 non-null    float64
 12  Price/Book      497 non-null    float64
 13  SEC Filings     505 non-null    object 
dtypes: float64(9), int64(1), object(4)
memory usage: 55.4+ KB


In [17]:
df_strings = df.select_dtypes(include=['object', 'string'])
df_strings.head()

Unnamed: 0,Symbol,Name,Sector,SEC Filings
0,MMM,3M Company,Industrials,http://www.sec.gov/cgi-bin/browse-edgar?action...
1,AOS,A.O. Smith Corp,Industrials,http://www.sec.gov/cgi-bin/browse-edgar?action...
2,ABT,Abbott Laboratories,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...
3,ABBV,AbbVie Inc.,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...
4,ACN,Accenture plc,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [21]:
df_strings["Symbol"].str.lower()

0       mmm
1       aos
2       abt
3      abbv
4       acn
       ... 
500     xyl
501     yum
502     zbh
503    zion
504     zts
Name: Symbol, Length: 505, dtype: object

In [23]:
df_strings["Symbol"].str.upper()

0       MMM
1       AOS
2       ABT
3      ABBV
4       ACN
       ... 
500     XYL
501     YUM
502     ZBH
503    ZION
504     ZTS
Name: Symbol, Length: 505, dtype: object

In [25]:
df_strings["Symbol"].str.capitalize()

0       Mmm
1       Aos
2       Abt
3      Abbv
4       Acn
       ... 
500     Xyl
501     Yum
502     Zbh
503    Zion
504     Zts
Name: Symbol, Length: 505, dtype: object

In [27]:
df_strings["Symbol"].str.title()

0       Mmm
1       Aos
2       Abt
3      Abbv
4       Acn
       ... 
500     Xyl
501     Yum
502     Zbh
503    Zion
504     Zts
Name: Symbol, Length: 505, dtype: object

In [29]:
df_strings["Symbol"].str.len()

0      3
1      3
2      3
3      4
4      3
      ..
500    3
501    3
502    3
503    4
504    3
Name: Symbol, Length: 505, dtype: int64

In [35]:
df_strings[df_strings["Name"].str.len() > 30]

Unnamed: 0,Symbol,Name,Sector,SEC Filings
19,ARE,Alexandria Real Estate Equities Inc,Real Estate,http://www.sec.gov/cgi-bin/browse-edgar?action...
35,AIG,"American International Group, Inc.",Financials,http://www.sec.gov/cgi-bin/browse-edgar?action...
37,AWK,American Water Works Company Inc,Utilities,http://www.sec.gov/cgi-bin/browse-edgar?action...
50,AIV,Apartment Investment & Management,Real Estate,http://www.sec.gov/cgi-bin/browse-edgar?action...
132,CCI,Crown Castle International Corp.,Real Estate,http://www.sec.gov/cgi-bin/browse-edgar?action...
189,FRT,Federal Realty Investment Trust,Real Estate,http://www.sec.gov/cgi-bin/browse-edgar?action...
191,FIS,Fidelity National Information Services,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...
249,IBM,International Business Machines,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...
277,LH,Laboratory Corp. of America Holding,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...
351,PKG,Packaging Corporation of America,Materials,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [37]:
df_strings["Name"].str.strip()

0                  3M Company
1             A.O. Smith Corp
2         Abbott Laboratories
3                 AbbVie Inc.
4               Accenture plc
                ...          
500                Xylem Inc.
501           Yum! Brands Inc
502    Zimmer Biomet Holdings
503             Zions Bancorp
504                    Zoetis
Name: Name, Length: 505, dtype: object

In [41]:
df_strings[df_strings["Name"].str.strip().str.len() > 30]

Unnamed: 0,Symbol,Name,Sector,SEC Filings
19,ARE,Alexandria Real Estate Equities Inc,Real Estate,http://www.sec.gov/cgi-bin/browse-edgar?action...
35,AIG,"American International Group, Inc.",Financials,http://www.sec.gov/cgi-bin/browse-edgar?action...
37,AWK,American Water Works Company Inc,Utilities,http://www.sec.gov/cgi-bin/browse-edgar?action...
50,AIV,Apartment Investment & Management,Real Estate,http://www.sec.gov/cgi-bin/browse-edgar?action...
132,CCI,Crown Castle International Corp.,Real Estate,http://www.sec.gov/cgi-bin/browse-edgar?action...
189,FRT,Federal Realty Investment Trust,Real Estate,http://www.sec.gov/cgi-bin/browse-edgar?action...
191,FIS,Fidelity National Information Services,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...
249,IBM,International Business Machines,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...
277,LH,Laboratory Corp. of America Holding,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...
351,PKG,Packaging Corporation of America,Materials,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [43]:
df_strings["Name"].str.split()

0                   [3M, Company]
1             [A.O., Smith, Corp]
2          [Abbott, Laboratories]
3                  [AbbVie, Inc.]
4                [Accenture, plc]
                  ...            
500                 [Xylem, Inc.]
501           [Yum!, Brands, Inc]
502    [Zimmer, Biomet, Holdings]
503              [Zions, Bancorp]
504                      [Zoetis]
Name: Name, Length: 505, dtype: object

In [45]:
df_strings[df_strings["Name"].str.split().str.len() > 4]

Unnamed: 0,Symbol,Name,Sector,SEC Filings
15,APD,Air Products & Chemicals Inc,Materials,http://www.sec.gov/cgi-bin/browse-edgar?action...
19,ARE,Alexandria Real Estate Equities Inc,Real Estate,http://www.sec.gov/cgi-bin/browse-edgar?action...
37,AWK,American Water Works Company Inc,Utilities,http://www.sec.gov/cgi-bin/browse-edgar?action...
56,AJG,Arthur J. Gallagher & Co.,Financials,http://www.sec.gov/cgi-bin/browse-edgar?action...
64,BHGE,"Baker Hughes, a GE Company",Energy,http://www.sec.gov/cgi-bin/browse-edgar?action...
202,FBHS,Fortune Brands Home & Security,Industrials,http://www.sec.gov/cgi-bin/browse-edgar?action...
258,JBHT,J. B. Hunt Transport Services,Industrials,http://www.sec.gov/cgi-bin/browse-edgar?action...
277,LH,Laboratory Corp. of America Holding,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...
436,BK,The Bank of New York Mellon Corp.,Financials,http://www.sec.gov/cgi-bin/browse-edgar?action...
452,FOXA,Twenty-First Century Fox Class A,Consumer Discretionary,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [47]:
df_strings["Name"].str.replace(" ", ":")

0                  3M:Company
1             A.O.:Smith:Corp
2         Abbott:Laboratories
3                 AbbVie:Inc.
4               Accenture:plc
                ...          
500                Xylem:Inc.
501           Yum!:Brands:Inc
502    Zimmer:Biomet:Holdings
503             Zions:Bancorp
504                    Zoetis
Name: Name, Length: 505, dtype: object

In [51]:
df_strings[df_strings["Name"].str.startswith("A")]

Unnamed: 0,Symbol,Name,Sector,SEC Filings
1,AOS,A.O. Smith Corp,Industrials,http://www.sec.gov/cgi-bin/browse-edgar?action...
2,ABT,Abbott Laboratories,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...
3,ABBV,AbbVie Inc.,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...
4,ACN,Accenture plc,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...
5,ATVI,Activision Blizzard,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...
...,...,...,...,...
59,ADSK,Autodesk Inc,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...
60,ADP,Automatic Data Processing,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...
61,AZO,AutoZone Inc,Consumer Discretionary,http://www.sec.gov/cgi-bin/browse-edgar?action...
62,AVB,"AvalonBay Communities, Inc.",Real Estate,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [57]:
df_strings[df_strings["Name"].str.endswith("n")]

Unnamed: 0,Symbol,Name,Sector,SEC Filings
22,ALLE,Allegion,Industrials,http://www.sec.gov/cgi-bin/browse-edgar?action...
49,APA,Apache Corporation,Energy,http://www.sec.gov/cgi-bin/browse-edgar?action...
68,BBT,BB&T Corporation,Financials,http://www.sec.gov/cgi-bin/browse-edgar?action...
69,BDX,Becton Dickinson,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...
97,CNC,Centene Corporation,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...
102,SCHW,Charles Schwab Corporation,Financials,http://www.sec.gov/cgi-bin/browse-edgar?action...
112,CTAS,Cintas Corporation,Industrials,http://www.sec.gov/cgi-bin/browse-edgar?action...
127,ED,Consolidated Edison,Utilities,http://www.sec.gov/cgi-bin/browse-edgar?action...
137,DHI,D. R. Horton,Consumer Discretionary,http://www.sec.gov/cgi-bin/browse-edgar?action...
162,ETN,Eaton Corporation,Industrials,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [59]:
df_strings[(df_strings["Name"].str.startswith("A")) & (df_strings["Name"].str.endswith("n"))]

Unnamed: 0,Symbol,Name,Sector,SEC Filings
22,ALLE,Allegion,Industrials,http://www.sec.gov/cgi-bin/browse-edgar?action...
49,APA,Apache Corporation,Energy,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [61]:
df_strings[df_strings["Name"].str.isdigit()]

Unnamed: 0,Symbol,Name,Sector,SEC Filings


In [63]:
df_strings[df_strings["Name"].str.isalnum()]

Unnamed: 0,Symbol,Name,Sector,SEC Filings
22,ALLE,Allegion,Industrials,http://www.sec.gov/cgi-bin/browse-edgar?action...
45,ANDV,Andeavor,Energy,http://www.sec.gov/cgi-bin/browse-edgar?action...
46,ANSS,ANSYS,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...
73,BLK,BlackRock,Financials,http://www.sec.gov/cgi-bin/browse-edgar?action...
76,BWA,BorgWarner,Consumer Discretionary,http://www.sec.gov/cgi-bin/browse-edgar?action...
81,AVGO,Broadcom,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...
100,CERN,Cerner,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...
126,COP,ConocoPhillips,Energy,http://www.sec.gov/cgi-bin/browse-edgar?action...
154,DWDP,DowDuPont,Materials,http://www.sec.gov/cgi-bin/browse-edgar?action...
174,EQIX,Equinix,Real Estate,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [65]:
df_strings[df_strings["Name"].str.isalpha()]

Unnamed: 0,Symbol,Name,Sector,SEC Filings
22,ALLE,Allegion,Industrials,http://www.sec.gov/cgi-bin/browse-edgar?action...
45,ANDV,Andeavor,Energy,http://www.sec.gov/cgi-bin/browse-edgar?action...
46,ANSS,ANSYS,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...
73,BLK,BlackRock,Financials,http://www.sec.gov/cgi-bin/browse-edgar?action...
76,BWA,BorgWarner,Consumer Discretionary,http://www.sec.gov/cgi-bin/browse-edgar?action...
81,AVGO,Broadcom,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...
100,CERN,Cerner,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...
126,COP,ConocoPhillips,Energy,http://www.sec.gov/cgi-bin/browse-edgar?action...
154,DWDP,DowDuPont,Materials,http://www.sec.gov/cgi-bin/browse-edgar?action...
174,EQIX,Equinix,Real Estate,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [71]:
df_strings[df_strings["Name"].str.contains("john", case=False)]

Unnamed: 0,Symbol,Name,Sector,SEC Filings
261,JNJ,Johnson & Johnson,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...
262,JCI,Johnson Controls International,Industrials,http://www.sec.gov/cgi-bin/browse-edgar?action...


In [111]:
df_strings["Last Name"] = df_strings["Name"].str.split(" ").str[-1]
df_strings[(df_strings["Last Name"].str.startswith("A")) & (df_strings["Last Name"].str.endswith("a"))]

Unnamed: 0,Symbol,Name,Sector,SEC Filings,Last Name
351,PKG,Packaging Corporation of America,Materials,http://www.sec.gov/cgi-bin/browse-edgar?action...,America


In [113]:
df_strings[(df_strings["Last Name"].str.contains('^[aeiouAEIOU].+[aeiouAEIOU]$'))]

Unnamed: 0,Symbol,Name,Sector,SEC Filings,Last Name
160,ETFC,E*Trade,Financials,http://www.sec.gov/cgi-bin/browse-edgar?action...,E*Trade
230,HPE,Hewlett Packard Enterprise,Information Technology,http://www.sec.gov/cgi-bin/browse-edgar?action...,Enterprise
245,INCY,Incyte,Health Care,http://www.sec.gov/cgi-bin/browse-edgar?action...,Incyte
248,ICE,Intercontinental Exchange,Financials,http://www.sec.gov/cgi-bin/browse-edgar?action...,Exchange
345,ORLY,O'Reilly Automotive,Consumer Discretionary,http://www.sec.gov/cgi-bin/browse-edgar?action...,Automotive
351,PKG,Packaging Corporation of America,Materials,http://www.sec.gov/cgi-bin/browse-edgar?action...,America
481,WBA,Walgreens Boots Alliance,Consumer Staples,http://www.sec.gov/cgi-bin/browse-edgar?action...,Alliance


In [119]:
df_strings["Name"].str[0:10:2]

0      3 opn
1      AO mt
2      Abt a
3      Abi n
4      Acnue
       ...  
500    XlmIc
501    Ym rn
502    Zme i
503    ZosBn
504      Zei
Name: Name, Length: 505, dtype: object

In [121]:
df_strings["Name"].str[::-1]

0                  ynapmoC M3
1             proC htimS .O.A
2         seirotarobaL ttobbA
3                 .cnI eiVbbA
4               clp erutneccA
                ...          
500                .cnI melyX
501           cnI sdnarB !muY
502    sgnidloH temoiB remmiZ
503             procnaB snoiZ
504                    siteoZ
Name: Name, Length: 505, dtype: object