In [4]:
import pandas as pd
from numpy.random import randn
from numpy.random import seed
from scipy.stats import pearsonr

import re                      # tool to perform pattern matching, used for data cleaning
from textblob import TextBlob  # Sentiment Analysis tool


In [1]:
def get_norm_stock(comp):
    
    # Read the stock data downloaded from Yahoo Finance.
    stock_df = pd.read_csv(str(comp)+".csv")
    stock_df.head()

    # Stock data pre-processing
    stock_df["Indicator(%)"] = (stock_df["Close"] - stock_df["Open"]) * 100 / stock_df["Open"]
    stock_df["Label"] = stock_df["Indicator(%)"].apply(lambda x: 1 if x > 0 else 0)
    stock_df.drop(columns= ["High","Low","Adj Close","Volume"], axis = 0).head(20)

    # Stock Data Analysis
    cleaned_stock_df = stock_df[["Date","Indicator(%)"]]
    cleaned_stock_df = cleaned_stock_df.set_index("Date")
    norm_STOCK_df=(cleaned_stock_df-cleaned_stock_df.mean())/cleaned_stock_df.std()
    
    # Save the stock analysis as csv
    norm_STOCK_df.to_csv(comp+"_STOCK.csv")

In [12]:
def get_norm_senti(comp):
    
    # Read the data scraped
    df = pd.read_csv(comp+"_tweets.csv")

    col = ["date","tweet","username"]
    newdf = df[col]
    newdf['date'] = newdf['date'].apply(lambda x : x[:10])
    newdf.set_index("date", inplace =  True)
    
    # Twitter data pre-processing
    symbol = comp
    clean = newdf["tweet"].str.lower()
    clean = clean.apply(lambda x :re.sub('@[a-z]*','',x))      # Remove tags
    clean = clean.apply(lambda x :re.sub('#[a-z0-9]*','',x))   # Remove hash tags
    clean = clean.apply(lambda x :re.sub('[0-9]+[a-z]*',' ',x)) # Remove numnbers and associated text. Like : 1st, 2nd, nth....
    clean = clean.apply(lambda x :re.sub('\n','',x))            # Remove \n\t
    clean = clean.apply(lambda x :re.sub('https?:\/\/.*',' ',x))        # Remove URLs
    clean = clean.apply(lambda x :re.sub('[:;!-.,()%/?|]',' ',x))       # Remove Special characters
    clean = clean.apply(lambda x :re.sub('$[a-z]*',' ',x))                        # Remove tickers and strings have $abc pattern
    clean = clean.apply(lambda x : x.encode('ascii', 'ignore').decode('ascii'))   # Remove emojis
    clean = clean.apply(lambda x :re.sub('[0-9]{4}-[0-9]{2}-[0-9]{2}','',x))      # Remove date
    clean = clean.apply(lambda x :re.sub('[0-9]*','',x))

    newdf["tweet"] = clean
    newdf = newdf.drop("username", axis = 1)

    # Twitter data Sentiment Analysis
    testimonial = TextBlob(clean[50])
    testimonial.sentiment

    newdf["senti_polarity"] = newdf["tweet"].apply(lambda x: TextBlob(x).sentiment.polarity)
    newdf["senti_subjectivity"] = newdf["tweet"].apply(lambda x: TextBlob(x).sentiment.subjectivity)

#      # Save the Sentiment analysis as csv
    twitter_df = newdf.groupby("date").mean()
    twitter_df.to_csv(comp+"_SA_results.csv")


In [None]:
companies = {'WMT','XOM','BRK', 'AMZN', 'AAPL'}

for comp in companies:
    ''' 
        For each company perform 
        Sentiment Analysis and Stock Analysis
    '''
    get_norm_senti(comp)
    get_norm_stock(comp)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
