In [5]:
from dotenv import load_dotenv
import os

load_dotenv("./env.local")

from network_analyzer_oo import *

analyzer = NetworkAnalyzer(period=5, half_life=30)

# 10 months of data for initial training
# 2 months of data for validation accuracy
train_start_time = int(datetime(2025, 11, 1).timestamp())
train_end_time = int(datetime(2026, 1, 31).timestamp())
val_start_time = int(datetime(2026, 1, 24).timestamp())
val_end_time = int(datetime(2026, 1, 31).timestamp())


In [6]:
ValDataProcesser = DataProcessor(period=5)
val_dataframe = ValDataProcesser.data_from_api(
    start_time=val_start_time,
    end_time=val_end_time,
    resolution=180
)

print(val_dataframe.head())

            timestamp                                      user_id  \
0 2026-01-24 00:00:00  device-b875a75b-fb3f-49f3-a2f8-1f5e2ea62580   
1 2026-01-24 00:03:00  device-b875a75b-fb3f-49f3-a2f8-1f5e2ea62580   
2 2026-01-24 00:06:00  device-b875a75b-fb3f-49f3-a2f8-1f5e2ea62580   
3 2026-01-24 00:09:00  device-b875a75b-fb3f-49f3-a2f8-1f5e2ea62580   
4 2026-01-24 00:12:00  device-b875a75b-fb3f-49f3-a2f8-1f5e2ea62580   

   connection_state  hour  miniute  second  period  day_of_week        date  
0                 0     0        0       0       0            5  2026-01-24  
1                 0     0        3       0       0            5  2026-01-24  
2                 0     0        6       0       1            5  2026-01-24  
3                 0     0        9       0       1            5  2026-01-24  
4                 0     0       12       0       2            5  2026-01-24  


In [7]:
analyzer.train_recursive(
    start_time=train_start_time, end_time=train_end_time, resolution=180, interval_days=5
)

Past Data Analysis for Network Connection State
   Found 491 devices

1. Fetching metrics...
   Fetched 50 records
   Processed data shape: (120050, 9)
   Date range: 2025-11-01 00:00:00 to 2025-11-06 00:00:00

4. Computing probabilities P(state|user,period,weekday)...
Probabilities saved to 'probabilities.json' and counts saved to 'counts.json'

Training from 2025-11-06 00:00:00 to 2025-11-11 00:00:00
   Found 491 devices

1. Fetching metrics...
   Fetched 50 records
Probabilities saved to 'probabilities.json' and counts saved to 'counts.json'

Training from 2025-11-11 00:00:00 to 2025-11-16 00:00:00
   Found 491 devices

1. Fetching metrics...
   Fetched 50 records
Probabilities saved to 'probabilities.json' and counts saved to 'counts.json'

Training from 2025-11-16 00:00:00 to 2025-11-21 00:00:00
   Found 491 devices

1. Fetching metrics...
   Fetched 50 records
Probabilities saved to 'probabilities.json' and counts saved to 'counts.json'

Training from 2025-11-21 00:00:00 to 2025-

In [5]:
analyzer.load_probabilities()
probability=analyzer.analyzer.get_probabilities()
print(probability[('eud-feed2146-cdc8-4246-95a1-c4f4c18581ff', np.int32(0), np.int32(1))])

KeyError: ('eud-feed2146-cdc8-4246-95a1-c4f4c18581ff', np.int32(0), np.int32(1))

In [8]:
val_accuracy = analyzer.evaluate(val_dataframe)
print(f"\nValidation Accuracy: {val_accuracy:.2%}")


Validation Accuracy: 97.95%


In [10]:
class Classifier:
    def __init__(self):
        self._features={} #private attribute to store features
        self.classifications = {}

    @staticmethod
    def entropy(p):
        p = p + 1e-9
        return -np.sum(p * np.log(p))
    
    @staticmethod
    def build_user_tensor(prob_dict):
        """
        Returns:
        user_profiles[user] = array shape (7,96,3)
        """
        user_profiles = defaultdict(lambda: np.zeros((7,288,3)))
        for (user, block, day), probs in prob_dict.items():
            user_profiles[user][day, block, :] = [probs.get(1,0), probs.get(0,0), probs.get(2,0)]

        return user_profiles
    
    def extract_weekly_features(self,user,profile):
        """
        profile shape: (7,96,3)
        """
        feats = {}
        login = profile[:,:,0]   # p_logged_in
        idle  = profile[:,:,2]

        total_login_activity = np.sum(login)
        feats["total_login_activity"] = total_login_activity

        # If device never logs in, stop here
        if total_login_activity < 1e-3:
            feats["inactive"] = True
            self._features[user] = feats
            return feats

        feats["inactive"] = False
        # --- Temporal entropy per day ---
        day_entropies = [self.entropy(login[d]) for d in range(7)]

        feats["login_entropy_mean"] = np.mean(day_entropies)
        feats["login_entropy_var"]  = np.var(day_entropies)

        # --- Weekday consistency ---
        sims = []
        for d1 in range(7):
            for d2 in range(d1+1, 7):
                sims.append(1 - cosine(login[d1], login[d2]))

        feats["weekday_similarity"] = np.mean(sims)

        # --- Peak strength ---
        feats["login_peak_max"] = np.max(login)

        # --- Active days count ---
        feats["active_days"] = np.sum(np.max(login, axis=1) > 0.05)

        # --- Idle dominance ---
        feats["idle_mean"] = np.mean(idle)

        self._features[user] = feats

        return feats

    def classify(self, probabilities: Dict[Tuple, Dict[int, float]]) -> None:
        """
        On the basis of the past data and probabilities classify the users into different categories:
            1. Fixed Wireless Devices
            2. Shift Connected Devices
            3. Ad-hoc Connected Devices (Random Connections)
        """

        user_profiles = self.build_user_tensor(probabilities)
        for user, profile in user_profiles.items():
            feats = self.extract_weekly_features(user, profile)
            if not feats["inactive"] and feats["weekday_similarity"] > 0.85 and feats["login_entropy_mean"] < 2.5:
                classification = "Fixed Wireless Device"
            elif not feats["inactive"] and feats["weekday_similarity"] > 0.6 and feats["active_days"] >= 4:
                classification = "Shift Connected Device"
            else:
                classification = "Ad-hoc Connected Device"
            
            self.classifications[user] = classification

probability = analyzer.analyzer.get_probabilities()
classifier = Classifier()
classifier.classify(probability)
for user, classification in classifier.classifications.items():
    print(f"User: {user}, Classification: {classification}")
    

User: device-1327bd60-cf7a-4370-8744-a621a69b6679, Classification: Ad-hoc Connected Device
User: device-158d54a2-d9ad-4e65-b399-cd3352766cb4, Classification: Ad-hoc Connected Device
User: device-22d66dcc-dc8c-46d6-b0c1-e5dd29596d50, Classification: Ad-hoc Connected Device
User: device-42bca9cf-0d22-43d8-8f55-29e45fe96d64, Classification: Ad-hoc Connected Device
User: device-5939d0a9-498f-454c-98cf-c67b5dd574c2, Classification: Ad-hoc Connected Device
User: device-5f94c1cf-469c-41ec-8511-68f55911f11f, Classification: Ad-hoc Connected Device
User: device-74a0bc14-e313-4cb6-98ec-072cbefe84cf, Classification: Ad-hoc Connected Device
User: device-81ab19e8-fe81-458c-996f-66da9e8cea71, Classification: Ad-hoc Connected Device
User: device-99224ee7-72be-4003-823c-0c569f069028, Classification: Ad-hoc Connected Device
User: device-9c47bfe7-22db-43fa-b699-d12f4ea9dfb1, Classification: Ad-hoc Connected Device
User: device-a0ad2048-552f-4008-abc7-611a54e10c96, Classification: Ad-hoc Connected Device

In [None]:
# TODO: do the predictions on fixed wireless devices, and see if we can improve accuracy near to 100%.

# TODO: Try to generate a cosmatic data on your own. (sample data)

# TODO: add the holidays effect into the model.

# TODO: convert the probability to chart (like heatmap) for better visualization, where horizontal axis is time of day, vertical axis is day of week. connected and disconnected colors, consided idle as connected.

# TODO: resolution should be around 1 minute.

In [None]:
# Some devices started later in the january, so there predictions won't be good. Look to handel such cases.
