In [4]:
!pip install pandas numpy scikit-learn transformers torch requests beautifulsoup4

import pandas as pd
import numpy as np
from transformers import pipeline
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier

class MemecoinDetector:
    def __init__(self):
        self.sentiment_analyzer = pipeline("sentiment-analysis")
        self.vectorizer = TfidfVectorizer()
        self.classifier = RandomForestClassifier()

    def get_coin_data(self, symbol):
        # Simulated API call to get token data
        try:
            url = f"https://api.coingecko.com/api/v3/coins/{symbol}"
            response = requests.get(url)
            data = response.json()
            return {
                'name': data.get('name', ''),
                'description': data.get('description', {}).get('en', ''),
                'market_cap': data.get('market_data', {}).get('market_cap', {}).get('usd', 0),
                'volume': data.get('market_data', {}).get('total_volume', {}).get('usd', 0),
                'website': data.get('links', {}).get('homepage', [''])[0],
            }
        except:
            return None

    def extract_features(self, coin_data):
        if not coin_data:
            return None

        features = []

        # Market metrics
        features.append(coin_data['market_cap'] / max(coin_data['volume'], 1))

        # Sentiment analysis
        sentiment = self.sentiment_analyzer(coin_data['description'])[0]
        features.append(1 if sentiment['label'] == 'POSITIVE' else 0)

        # Text features
        meme_keywords = ['dog', 'cat', 'moon', 'rocket', 'elon', 'safe', 'shib', 'inu']
        text = (coin_data['name'] + ' ' + coin_data['description']).lower()
        features.extend([1 if keyword in text else 0 for keyword in meme_keywords])

        return np.array(features)

    def predict(self, symbol):
        coin_data = self.get_coin_data(symbol)
        if not coin_data:
            return "Could not fetch coin data"

        features = self.extract_features(coin_data)
        if features is None:
            return "Could not analyze features"

        # Feature analysis
        risk_score = sum(features[2:]) / len(features[2:])  # Meme keyword ratio
        market_health = features[0]  # Market cap to volume ratio
        sentiment = features[1]  # Sentiment score

        # Risk assessment
        if risk_score > 0.5 and market_health < 1:
            confidence = "High"
        elif risk_score > 0.3 or market_health < 2:
            confidence = "Medium"
        else:
            confidence = "Low"

        return {
            "symbol": symbol,
            "memecoin_probability": risk_score,
            "market_health": market_health,
            "sentiment": "Positive" if sentiment else "Negative",
            "confidence": confidence,
            "is_likely_memecoin": risk_score > 0.3
        }

# Example usage
detector = MemecoinDetector()
sample_coins = ["bitcoin", "dogecoin", "shiba-inu"]
for coin in sample_coins:
    result = detector.predict(coin)
    print(f"\nAnalysis for {coin}:")
    print(result)



No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu
Token indices sequence length is longer than the specified maximum sequence length for this model (610 > 512). Running this sequence through the model will result in indexing errors


RuntimeError: The size of tensor a (610) must match the size of tensor b (512) at non-singleton dimension 1

In [5]:
!pip install pandas numpy scikit-learn transformers torch requests beautifulsoup4

import pandas as pd
import numpy as np
from transformers import pipeline
import requests
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier

class MemecoinDetector:
    def __init__(self):
        self.sentiment_analyzer = pipeline("sentiment-analysis", truncation=True, max_length=512)
        self.vectorizer = TfidfVectorizer()
        self.classifier = RandomForestClassifier()

    def get_coin_data(self, symbol):
        try:
            url = f"https://api.coingecko.com/api/v3/coins/{symbol}"
            response = requests.get(url)
            data = response.json()
            return {
                'name': data.get('name', ''),
                'description': data.get('description', {}).get('en', '')[:500],  # Truncate description
                'market_cap': data.get('market_data', {}).get('market_cap', {}).get('usd', 0),
                'volume': data.get('market_data', {}).get('total_volume', {}).get('usd', 0),
                'website': data.get('links', {}).get('homepage', [''])[0],
            }
        except:
            return None

    def extract_features(self, coin_data):
        if not coin_data:
            return None

        features = []

        # Market metrics
        features.append(coin_data['market_cap'] / max(coin_data['volume'], 1))

        # Sentiment analysis
        sentiment = self.sentiment_analyzer(coin_data['description'])[0]
        features.append(1 if sentiment['label'] == 'POSITIVE' else 0)

        # Text features
        meme_keywords = ['dog', 'cat', 'moon', 'rocket', 'elon', 'safe', 'shib', 'inu']
        text = (coin_data['name'] + ' ' + coin_data['description']).lower()
        features.extend([1 if keyword in text else 0 for keyword in meme_keywords])

        return np.array(features)

    def predict(self, symbol):
        coin_data = self.get_coin_data(symbol)
        if not coin_data:
            return "Could not fetch coin data"

        features = self.extract_features(coin_data)
        if features is None:
            return "Could not analyze features"

        risk_score = sum(features[2:]) / len(features[2:])
        market_health = features[0]
        sentiment = features[1]

        if risk_score > 0.5 and market_health < 1:
            confidence = "High"
        elif risk_score > 0.3 or market_health < 2:
            confidence = "Medium"
        else:
            confidence = "Low"

        return {
            "symbol": symbol,
            "memecoin_probability": float(f"{risk_score:.2f}"),
            "market_health": float(f"{market_health:.2f}"),
            "sentiment": "Positive" if sentiment else "Negative",
            "confidence": confidence,
            "is_likely_memecoin": risk_score > 0.3
        }

# Example usage
detector = MemecoinDetector()
sample_coins = ["bitcoin", "dogecoin", "shiba-inu"]
for coin in sample_coins:
    result = detector.predict(coin)
    print(f"\nAnalysis for {coin}:")
    print(result)



No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu



Analysis for bitcoin:
{'symbol': 'bitcoin', 'memecoin_probability': 0.0, 'market_health': 32.52, 'sentiment': 'Negative', 'confidence': 'Low', 'is_likely_memecoin': False}

Analysis for dogecoin:
{'symbol': 'dogecoin', 'memecoin_probability': 0.38, 'market_health': 17.3, 'sentiment': 'Positive', 'confidence': 'Medium', 'is_likely_memecoin': True}

Analysis for shiba-inu:
{'symbol': 'shiba-inu', 'memecoin_probability': 0.38, 'market_health': 28.29, 'sentiment': 'Negative', 'confidence': 'Medium', 'is_likely_memecoin': True}
