In [16]:
import os
import emoji
import pickle
import collections
import pandas as pd
from nltk.corpus import stopwords
from sklearn.linear_model import RidgeClassifier
from sklearn.feature_extraction.text import CountVectorizer


# Create Model and save assets

In [67]:
df = pd.read_excel(r'data-labeled\combined-super-clean-data.xlsx')
df = df[df['label'].isin(['positive', 'negative', 'neutral'])]
df.columns = ['data', 'label']

stop_words = set(stopwords.words('english'))
count_vector_one_gram = CountVectorizer(stop_words=stop_words, strip_accents='unicode', ngram_range=(1,1))
count_vector_one_gram.fit(df['data'].astype('str'))
X_count_vectorizer_one_gram = count_vector_one_gram.transform(df['data'].astype('str'))

clf = RidgeClassifier(random_state=0)
clf.fit(X_count_vectorizer_one_gram, df['label'])

RidgeClassifier(random_state=0)

In [68]:
if not os.path.isdir('model-assets'):
    os.makedirs('model-assets')

with open(r'model-assets/feature.pkl', 'wb') as f:
    pickle.dump(count_vector_one_gram, f)

with open(r'model-assets/model.pkl', 'wb') as f:
    pickle.dump(clf, f)

# Use model for Prediction

In [5]:
def clean_data(user_data):

    df = pd.DataFrame(data=user_data, columns=['data'])

    df['clean'] = df['data'].apply(lambda x: str(x).replace('\n', ' '))
    df['clean'] = df['clean'].str.replace(pat='(@\w+)', repl='', regex=True)
    df['clean'] = df['clean'].replace(to_replace=' +', value=' ', regex=True)
    
    df['clean'] = df['data'].astype(str).apply(lambda x: ' '.join([y for y in x.split() if len(y)<15]))
    df['clean'] = df['clean'].astype(str).str.replace(r"""[#*+\\\(\)_!:\-\[\]<>']""", '', regex=True)
    df['clean'] = df['clean'].astype(str).str.replace(r"""(\.{2,})""", '.', regex=True)
    df['clean'] = df['clean'].astype(str).str.replace(r"""[-a-zA-Z0-9@:%_\+.~#?&//=]{2,256}\.[a-z]{2,4}\b(\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?""", '<<<URL>>>', regex=True)
    df['clean'] = df['clean'].astype(str).str.replace(r""" +""", ' ', regex=True)
    df['clean'] = df['clean'].astype(str).str.replace(r"""\$(\d+\.?,?)+""", '<<<AMOUNT>>>', regex=True)
    df['clean'] = df['clean'].astype(str).str.replace(r"""(\d+%)""", '<<<PERCENTAGE>>>', regex=True)
    df['clean'] = df['clean'].apply(lambda x: emoji.demojize(x, delimiters=("_", "")))
    
    return df['clean']

In [6]:
def load_assets(feature_pkl_path, model_pkl_path):
    with open(feature_pkl_path, 'rb') as f:
        vectorizer = pickle.load(f)

    with open(model_pkl_path, 'rb') as f:
        model = pickle.load(f)

    return (vectorizer, model)

In [7]:
def generate_featue(vectorizer, df):
    
    features = vectorizer.transform(df)
    
    return features

# USAGE

In [1]:
raw_data = '''**âš¡ï¸Craft Brewing Business Features Launch of 1st Craft Beer NFT on Blingâš¡ï¸

Trace **doing good work!!

https://twitter.com/7bridgesbrewing/status/1445638656966881280?s=21

Read more: https://bit.ly/TraceNFTBeer-CraftBrew"
"âš¡ï¸**#CyberTrade is well on its way to become most anticipated Blockchain game of the year! 

https://twitter.com/CyberTradeGame/status/1445765127190368259?s=20 
**
**27 000 Pre-Registered users in 24 hours! 
**
**Go to**ðŸ‘‰**[PRE-REGISTER](https://cybertrade.game/whitelist) and join the New era of #NFTgaming!**"
"NFT farming on kawaii islands marketplace!!

https://twitter.com/kawaii_islands/status/1445797046921965568"
"Wallfair #whitelist is now open 

https://twitter.com/joinwallfair/status/1445815427549081602"
"Get ready to watch movies with Crypto ðŸŽ¬ 

https://twitter.com/CEOAdam/status/1445538907324497920"
JUST IN: The Mexican Stock Exchange (BMV) is now considering listing crypto futures on its derivatives exchange.
"**Just another day and another Bitcoin pump** âš¡ï¸

https://twitter.com/BitcoinMagazine/status/1445740489114669063"
"**Uptober is real!!!
**
https://twitter.com/CryptoDaku_/status/1445669281740439554"
"ðŸ¤**Amasa x IoTex**ðŸ¤

https://twitter.com/Amasa_io/status/1445751950411173888"
"Time for Next AMA: We have AMA with The Panther Protocol team

Panther is a End-to-end privacy protocol for digital assets.

https://www.pantherprotocol.io/
https://t.me/pantherprotocol

Reward: as usual :) we always have rewards :)
Countdown: https://tinyurl.com/c6wsnhxn
Venue: https://t.me/cryptodakurobinhooders"
"Time for Next AMA: We have AMA with Demole team

Demole is the first playable 3D RPG game in the NFT ecosystem consisting of multiple earning and gameplay mechanics.

https://demole.io/
https://t.me/demole_ioCommunity

Reward: as usual :) we always have rewards :)
Countdown: https://tinyurl.com/9vhptb2b 
Venue: https://t.me/cryptodakurobinhooders"
"**Panther announces partnership with Bumper!** ðŸ”¥ðŸ”¥

Another important partnership announcement! We have entered into a partnership agreement that aims to benefit retail and institutional users alike and the benefits to both parties are manyfold.

Together, Panther and Bumper aim to provide DeFi users with a seamless, safe, and private DeFi user experience. We will also create workflows that allow institutions and fintechs (and their users) to enter DeFi and enjoy the Bumper Protocolâ€™s volatility protection systems, all the while staying compliant with world-class KYC and AML standards.

https://twitter.com/ZKPanther/status/1445797651966881803"
"**Demole x GateLabs**ðŸ”¥

https://twitter.com/demoleio/status/1445721329013977099"
"https://bit.ly/3oDcm47

Netvrk spacesip partnership"
"ðŸ’«ðŸ’« **RENA, the governance token for the worldâ€™s first highly-personalized, NFT metaverse game Warena is now available on the CoinMarketCap** ðŸ’«ðŸ’«

â€œObviously itâ€™s an important step for us to take,â€ said Minh Doan, co-founder of Warena, â€œCoinMarketCap is the world's most-referenced price-tracking website for crypto assets. They empower retail users to make informed, detailed conclusions. In the end, being a listed token builds upon the already-sound reputation of Warena.â€

**RENA on CoinMarketCap
**https://coinmarketcap.com/currencies/warena/

**READ MORE **ðŸ‘‡ðŸ‘‡
https://teamwarena.medium.com/rena-now-listed-on-coinmarketcap-c588153e3a7

**LIKE AND SHARE** ðŸ‘
https://twitter.com/WarenaOfficial/status/1445730053019484172?s=20"
"**THE APES ARE COMING THIS OCTOBER!! **ðŸ˜±

Mines of Dalarnia â€œMining Apesâ€ NFT Collection is coming this October 14th. So stay tuned, Miners! 

The final 10,000 mining apes are selected randomly, and although everyone is unique, some combinations will be âœ¨rarerâœ¨ than others. 

Mining apes are different from each other through physical attributes, accessories, and equipments. ðŸ’

Soooâ€¦. Which mining ape do you think you'll get? ðŸ™Š"
"Geist Finance will launch on October 6th, 2021 at 10:00am UTC. GEIST emissions will begin one hour after the protocol launches, to ensure that all early users have an opportunity to participate.

https://t.me/geist_finance

https://medium.com/@geistfantom/launch-announcement-geist-finance-db160c97313e

- Move funds to FTM Chain or other bridges https://anyswap.exchange

RPC URL: https://rpc.ftm.tools/
Chain ID: 250
Currency Symbol: FTM
Block Explorer URL: https://ftmscan.com/

Ape as usual DYOR!! your money your responsibilty"
"**Realm Tearser App launch on android and ios 

**https://twitter.com/Enter_Realm/status/1445708153329315847"
"https://twitter.com/MRPHSupplyChain/status/1445491026299273218

MRPH!! rebrand and move up!!"
"We have got it covered for our developer community on all fronts with our [4 layer solution](https://twitter.com/merkleNetwork/status/1445686502076272641). ðŸ¦¾

ðŸ“œCheck out our whitepaper for detailed information - 
https://merkle.network/files/whitepaper_v1_0.pdf

#Interoperability #oracles #architecture $ETH $BNB $MATIC $DOT $AVAX $SOL $MERKLE

**ðŸ—£ Retweet**: https://twitter.com/merkleNetwork/status/1445686502076272641"
"â˜ ï¸**Latest Episode of the Rugpull Series is here**â˜ ï¸

The developer behind the** NFT project, 'Evil Ape,** suddenly **disappeared** along with its Twitter account, website, and **$2.7 million.** 

https://www.vice.com/en/article/y3dyem/investors-spent-millions-on-evolved-apes-nfts-then-they-got-scammed?utm_source=VICE_Twitter"

"**Dark Frontiers IGO on GameStarter in live!!!**ðŸš€

IGO ends on Oct 6th, 19:00 UTC

https://twitter.com/gamestarter_co/status/1445464584026951687?s=20"
"Crypto to become legal in Brazil ðŸ‡§ðŸ‡·??

Brazilâ€™s Congress will discuss a bill to regulate the companies operating in the cryptocurrency sector and increases penalties for crypto pyramid schemes and other illegal activities. 

**A big step towards mass adoption of crypto **âš¡ï¸"
"**Warena (RENA)** Quadruple IDO will be held on 

**ðŸ”¹DAO Maker** (7 Oct) 
**ðŸ”¹Red Kite** (7 Oct) 
**ðŸ”¹GameFi** (7 Oct) 
**ðŸ”¹PAID Ignition** (8 Oct)

ðŸš€Public Sale/IDO Price: $0.165
ðŸš€Private Sale Price: $0.165
ðŸš€Initial Marketcap: $420,000

https://twitter.com/WarenaOfficial/status/1445642759130402829?s=20"
"**HeroVerse Marketplace will open at 12:00 UTC 7th Oct 

**https://heroverse-game.medium.com/heroverse-sets-to-boost-its-marketplace-as-an-one-stop-shop-4b8644d8b4fb"
"Kevin Oâ€™Leary Now Has More Crypto Than Gold - What about you ðŸ‘€?

https://twitter.com/BTCTN/status/1445321275547824130"
"A nation state buying Bitcoin dips âœ¨ - We have come a long way now!!

https://twitter.com/BitcoinMagazine/status/1445429295627870215"
"""Insufficiently bullish on NFTs""""

https://twitter.com/punk6529/status/1443921334837338114"
"**JUST IN: A**rab Bank Switzerland has announced that they have chosen to partner with **Tezos** to provide its clients with innovative, compliant on-chain digital financial products. 

**What does this mean?**

Arab Bank Switzerland will now provide institutional-grade storage, staking, and trading services for tez (XTZ)."
"It's been a wild ride for Bitcoin in 2021!! And it now beats stocks, commodities to become **best-performing asset of 2021**âš¡ï¸

https://twitter.com/Bitcoin/status/1445155518608142336"
"**Polytrade MVP to be Unveiled on 13-14th October!!!

**https://twitter.com/Polytrade_fin/status/1445383162385813517?s=20"
"âš¡ï¸**Pre-Register for alpha test of Revolutionary NFT game #CyberTrade is officialy open! âš¡ï¸

**Everybody who enter event will receive **â€œAlpha event gold chief skinâ€** for their #NFT fighter in **CyberTrade** on launch

https://twitter.com/CyberTradeGame/status/1445400497955954695?s=20"
"**Nasdaq Down, S&P 500 Down, and Bitcoin UP **âš¡ï¸**
**
It's the first week of **Uptober** and Bitcoin and alts are showing resilience to other correlated markets.

But can they ""keep their head when all about them are losing theirs"" and truly become an independent market?

Complete Read hereðŸ‘‡

https://b21.ghost.io/b21-ddd-oct-04-2021"
"**MyMasterWar Testnet will be live on Oct 8 **ðŸ’¥

https://mymasterwar.medium.com/invitation-to-join-the-gamified-mymasterwar-testnet-become-the-lucky-mat-warriors-5d94fac4c167

**Apply Now to Get Testnet Access:** https://forms.gle/uF28saDn2TVxgsn98"
"**ðŸš€Demole X Venture Capitals**ðŸš€

https://twitter.com/demoleio/status/1445393068685991950?s=20"
"**THE APES ARE HERE!! **ðŸ¦

#nftsale

Learn more about this collection by reading through our blog below. ðŸ‘‡
https://minesofdalarnia.medium.com/mines-of-dalarnia-mining-apes-nft-collection-9aaee00c2fe6"
"**The race to cater to institutional investors who want to wager on cryptocurrency is heating up **ðŸ”¥

Today U.S. Bank - 5th biggest retail bank in the nation, announced today that its cryptocurrency custody services are now live and available to their Global Fund Services clients.

According to **Gunjan Kedia, vice chair, U.S. Bank,** ""Investor interest in cryptocurrency and demand from our fund services clients have grown strongly over the last few years and we made it a priority to accelerate our ability to offer custody services.â€"
"MRFI listing on Mexc!!

repeat of avxl performancE? lets see!!"
'''.split('\n')

In [2]:
raw_data = [x for x in raw_data if len(x)>5]

In [8]:
vectorizer , model = load_assets(
    feature_pkl_path=r"D:\Projects\new-tele\model-assets\feature.pkl",
    model_pkl_path=r"D:\Projects\new-tele\model-assets\model.pkl"
)

In [9]:
df = clean_data(raw_data)

In [10]:
X = generate_featue(vectorizer, df)

In [12]:
predictions = model.predict(X)

In [15]:
sample_count = len(predictions)

In [20]:
count_of_each_predictions = collections.Counter(predictions)

In [21]:
count_of_each_predictions['positive']

27

In [23]:
sentiment_score = count_of_each_predictions['positive'] / sample_count

In [24]:
sentiment_score

0.20930232558139536