<a href="https://colab.research.google.com/github/Determine-warrior/AIVA/blob/main/uday.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas numpy matplotlib seaborn plotly scikit-learn

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import IsolationForest
from sklearn.impute import SimpleImputer
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots



In [2]:
class CMDBIntelligenceSystem:
    def __init__(self, data=None):
        """
        Initialize the CMDB Intelligence System with an optional existing dataset.

        Parameters:
        - data: Pandas DataFrame, existing dataset (default: None, will generate synthetic data)
        """
        self.data = data
        self.cleaned_data = None
        self.clusters = None
        self.anomalies = None
        self.health_scores = None
        self.risk_scores = None
        print(" CMDB Intelligence System initialized")


cmdb_system = CMDBIntelligenceSystem(data=None)

 CMDB Intelligence System initialized


In [3]:

def generate_cmdb_data(self, n_records=5000):
    """
    Generate realistic CMDB data for analysis if no dataset is provided.

    Parameters:
    - n_records: int, number of records to generate (default: 5000)

    Returns:
    - Pandas DataFrame with synthetic CMDB data
    """
    if self.data is not None:
        print("Existing dataset provided, skipping synthetic data generation")
        return self.data

    print("Generating Synthetic CMDB Dataset...")
    np.random.seed(42)

    ci_types = ['Server', 'Database', 'Application', 'Network_Device', 'Storage', 'Service', 'Virtual_Machine']
    environments = ['Production', 'Staging', 'Development', 'Test', 'DR']
    statuses = ['Active', 'Inactive', 'Maintenance', 'Deprecated', 'Planned']
    criticality = ['Critical', 'High', 'Medium', 'Low']
    locations = ['DataCenter_A', 'DataCenter_B', 'Cloud_AWS', 'Cloud_Azure', 'OnPremise', 'Hybrid']

    data = []
    for i in range(n_records):
        ci_id = f"CI_{str(i+1).zfill(6)}"
        ci_type = np.random.choice(ci_types, p=[0.25, 0.15, 0.2, 0.15, 0.1, 0.1, 0.05])
        environment = np.random.choice(environments, p=[0.35, 0.2, 0.2, 0.15, 0.1])
        status = np.random.choice(statuses, p=[0.65, 0.1, 0.1, 0.1, 0.05])
        business_criticality = np.random.choice(criticality, p=[0.15, 0.3, 0.4, 0.15])
        location = np.random.choice(locations)


        created_date = datetime.now() - timedelta(days=np.random.randint(1, 1460))  # 0-4 years
        last_updated = created_date + timedelta(days=np.random.randint(0, 730))
        last_scanned = datetime.now() - timedelta(days=np.random.randint(0, 120))

        if ci_type in ['Server', 'Virtual_Machine']:
            cpu_utilization = np.random.beta(2, 5) * 100  # Skewed distribution
            memory_utilization = np.random.beta(3, 4) * 100
            disk_utilization = np.random.beta(2, 6) * 100
            network_latency = np.random.exponential(10)
        elif ci_type == 'Database':
            cpu_utilization = np.random.beta(3, 5) * 100
            memory_utilization = np.random.beta(4, 3) * 100
            disk_utilization = np.random.beta(5, 3) * 100
            network_latency = np.random.exponential(5)
        else:
            cpu_utilization = np.random.beta(2, 7) * 100
            memory_utilization = np.random.beta(3, 6) * 100
            disk_utilization = np.random.beta(2, 8) * 100
            network_latency = np.random.exponential(15)

        # Clamp values
        cpu_utilization = max(0, min(100, cpu_utilization))
        memory_utilization = max(0, min(100, memory_utilization))
        disk_utilization = max(0, min(100, disk_utilization))
        network_latency = max(0, min(100, network_latency))

        # Incident and change data
        incident_count_30d = np.random.negative_binomial(2, 0.5 if business_criticality == 'Critical' else 0.7)
        change_count_30d = np.random.negative_binomial(3, 0.6 if environment == 'Production' else 0.4)

        # Age and staleness
        age_days = (datetime.now() - created_date).days
        days_since_update = (datetime.now() - last_updated).days
        days_since_scan = (datetime.now() - last_scanned).days

        # Compliance and documentation
        compliance_score = np.random.uniform(0.5, 1.0) if status == 'Active' else np.random.uniform(0.2, 0.8)
        documentation_completeness = np.random.uniform(0.3, 1.0)

        # Owner and dependencies
        has_owner = np.random.choice([True, False], p=[0.85, 0.15])
        owner = f"Team_{np.random.randint(1, 25)}" if has_owner else None
        dependency_count = np.random.negative_binomial(5, 0.5 if ci_type in ['Application', 'Service'] else 0.7)

        # Missing values
        if np.random.random() < 0.1:
            cpu_utilization = None
        if np.random.random() < 0.08:
            memory_utilization = None
        if np.random.random() < 0.12:
            documentation_completeness = None

        data.append({
            'CI_ID': ci_id,
            'CI_Type': ci_type,
            'Environment': environment,
            'Status': status,
            'Business_Criticality': business_criticality,
            'Location': location,
            'Created_Date': created_date,
            'Last_Updated': last_updated,
            'Last_Scanned': last_scanned,
            'CPU_Utilization': cpu_utilization,
            'Memory_Utilization': memory_utilization,
            'Disk_Utilization': disk_utilization,
            'Network_Latency': network_latency,
            'Incident_Count_30d': incident_count_30d,
            'Change_Count_30d': change_count_30d,
            'Age_Days': age_days,
            'Days_Since_Update': days_since_update,
            'Days_Since_Scan': days_since_scan,
            'Compliance_Score': compliance_score,
            'Documentation_Completeness': documentation_completeness,
            'Owner': owner,
            'Dependency_Count': dependency_count
        })

    self.data = pd.DataFrame(data)
    print(f" Generated {len(self.data)} CMDB records")
    return self.data


CMDBIntelligenceSystem.generate_cmdb_data = generate_cmdb_data


if cmdb_system.data is None:
    cmdb_system.generate_cmdb_data(n_records=5000)

cmdb_system.data.to_excel('cmdb_data.xlsx', index=False, engine='openpyxl')

Generating Synthetic CMDB Dataset...
 Generated 5000 CMDB records


In [4]:
# Cell 3: Data Profiling
def profile_data(self):
    """
    Comprehensive data profiling and quality assessment with interactive visualizations.

    Returns:
    - quality_report: DataFrame with missing data statistics
    """
    print("\n CMDB Data Profiling Report")
    print("=" * 50)

    # Basic statistics
    print(f"Total Records: {len(self.data):,}")
    print(f"Total Columns: {len(self.data.columns)}")
    print(f"Memory Usage: {self.data.memory_usage(deep=True).sum() / 1024**2:.1f} MB")

    # Missing data analysis
    missing_data = self.data.isnull().sum()
    missing_percent = (missing_data / len(self.data)) * 100
    quality_report = pd.DataFrame({
        'Missing_Count': missing_data,
        'Missing_Percentage': missing_percent
    }).sort_values('Missing_Percentage', ascending=False)

    print("\n Data Quality Assessment:")
    print(quality_report[quality_report['Missing_Count'] > 0])

    # Interactive missing data visualization
    fig = px.bar(quality_report[quality_report['Missing_Count'] > 0],
                 x=quality_report[quality_report['Missing_Count'] > 0].index,
                 y='Missing_Percentage',
                 title='Missing Data Percentage by Column',
                 labels={'Missing_Percentage': 'Missing Data (%)', 'index': 'Column'},
                 color='Missing_Percentage',
                 color_continuous_scale='Reds')
    fig.update_layout(showlegend=False)
    fig.show()

    # Categorical distributions
    categorical_cols = ['CI_Type', 'Environment', 'Status', 'Business_Criticality', 'Location']
    available_cols = [col for col in categorical_cols if col in self.data.columns]
    print("\n Categorical Distributions:")
    for col in available_cols:
        print(f"\n{col}:")
        print(self.data[col].value_counts().head())

        # Interactive categorical distribution
        fig = px.histogram(self.data, x=col, title=f'Distribution of {col}',
                          color=col, color_discrete_sequence=px.colors.qualitative.Plotly)
        fig.update_layout(showlegend=False)
        fig.show()

    # Numerical statistics
    numerical_cols = ['CPU_Utilization', 'Memory_Utilization', 'Disk_Utilization',
                     'Incident_Count_30d', 'Change_Count_30d', 'Network_Latency']
    available_num_cols = [col for col in numerical_cols if col in self.data.columns]
    print("\n Numerical Metrics Summary:")
    print(self.data[available_num_cols].describe())

    # Interactive numerical distribution
    for col in available_num_cols:
        fig = px.histogram(self.data, x=col, title=f'Distribution of {col}',
                          nbins=30, color_discrete_sequence=['blue'])
        fig.show()

    return quality_report


CMDBIntelligenceSystem.profile_data = profile_data

quality_report = cmdb_system.profile_data()


 CMDB Data Profiling Report
Total Records: 5,000
Total Columns: 22
Memory Usage: 3.5 MB

 Data Quality Assessment:
                            Missing_Count  Missing_Percentage
Owner                                 762               15.24
Documentation_Completeness            675               13.50
CPU_Utilization                       522               10.44
Memory_Utilization                    407                8.14



 Categorical Distributions:

CI_Type:
CI_Type
Server            1222
Application        964
Database           799
Network_Device     703
Storage            542
Name: count, dtype: int64



Environment:
Environment
Production     1785
Development     994
Staging         976
Test            718
DR              527
Name: count, dtype: int64



Status:
Status
Active         3253
Deprecated      527
Inactive        492
Maintenance     471
Planned         257
Name: count, dtype: int64



Business_Criticality:
Business_Criticality
Medium      1985
High        1511
Low          753
Critical     751
Name: count, dtype: int64



Location:
Location
OnPremise       871
Cloud_Azure     859
DataCenter_A    825
Cloud_AWS       825
DataCenter_B    812
Name: count, dtype: int64



 Numerical Metrics Summary:
       CPU_Utilization  Memory_Utilization  Disk_Utilization  \
count      4478.000000         4593.000000       5000.000000   
mean         26.849781           39.668211         28.197721   
std          15.624963           17.981218         20.462893   
min           0.362601            0.669654          0.244550   
25%          14.635442           25.851862         12.858661   
50%          24.619506           38.082685         22.322626   
75%          36.442580           51.702212         38.212217   
max          89.233452           95.772251         97.895220   

       Incident_Count_30d  Change_Count_30d  Network_Latency  
count         5000.000000       5000.000000      5000.000000  
mean             1.028200          3.628400        12.002526  
std              1.348395          3.186525        12.803178  
min              0.000000          0.000000         0.001266  
25%              0.000000          1.000000         3.115540  
50%             

In [5]:
# Data Cleaning and Preprocessing
def clean_and_preprocess(self):
    """
    Clean and preprocess the CMDB data with advanced imputation and feature engineering.

    Returns:
    - cleaned_data: Processed DataFrame
    """
    print("\n🧹 Data Cleaning and Preprocessing...")
    self.cleaned_data = self.data.copy()

    # Handling missing values with KNN imputation for numerical columns
    numerical_cols = ['CPU_Utilization', 'Memory_Utilization', 'Disk_Utilization', 'Network_Latency']
    available_num_cols = [col for col in numerical_cols if col in self.cleaned_data.columns]
    if available_num_cols:
        from sklearn.impute import KNNImputer
        imputer = KNNImputer(n_neighbors=5)
        self.cleaned_data[available_num_cols] = imputer.fit_transform(self.cleaned_data[available_num_cols])

    # Mean imputation for documentation completeness
    if 'Documentation_Completeness' in self.cleaned_data.columns:
        self.cleaned_data['Documentation_Completeness'].fillna(
            self.cleaned_data['Documentation_Completeness'].mean(), inplace=True)

    # Handling missing owners
    if 'Owner' in self.cleaned_data.columns:
        self.cleaned_data['Owner'].fillna('UNASSIGNED', inplace=True)
        self.cleaned_data['Has_Owner'] = self.cleaned_data['Owner'] != 'UNASSIGNED'

    # Creating derived features
    if 'Days_Since_Update' in self.cleaned_data.columns:
        self.cleaned_data['Data_Freshness_Score'] = np.exp(-self.cleaned_data['Days_Since_Update'] / 30)
    if 'Days_Since_Scan' in self.cleaned_data.columns:
        self.cleaned_data['Scan_Freshness_Score'] = np.exp(-self.cleaned_data['Days_Since_Scan'] / 7)

    # Lifecycle stage
    def categorize_lifecycle(age_days):
        if pd.isna(age_days):
            return 'Unknown'
        if age_days < 90:
            return 'New'
        elif age_days < 365:
            return 'Active'
        elif age_days < 1095:
            return 'Mature'
        else:
            return 'Legacy'

    if 'Age_Days' in self.cleaned_data.columns:
        self.cleaned_data['Lifecycle_Stage'] = self.cleaned_data['Age_Days'].apply(categorize_lifecycle)

    print("Data cleaning completed")
    print(f"Records after cleaning: {len(self.cleaned_data)}")

    # Visualize lifecycle
    fig = px.histogram(self.cleaned_data, x='Lifecycle_Stage', title='Lifecycle Stage Distribution',
                      color='Lifecycle_Stage', color_discrete_sequence=px.colors.qualitative.Plotly)
    fig.update_layout(showlegend=False)
    fig.show()

    return self.cleaned_data

CMDBIntelligenceSystem.clean_and_preprocess = clean_and_preprocess

cmdb_system.clean_and_preprocess()


🧹 Data Cleaning and Preprocessing...
Data cleaning completed
Records after cleaning: 5000



A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





Unnamed: 0,CI_ID,CI_Type,Environment,Status,Business_Criticality,Location,Created_Date,Last_Updated,Last_Scanned,CPU_Utilization,...,Days_Since_Update,Days_Since_Scan,Compliance_Score,Documentation_Completeness,Owner,Dependency_Count,Has_Owner,Data_Freshness_Score,Scan_Freshness_Score,Lifecycle_Stage
0,CI_000001,Database,DR,Inactive,Medium,DataCenter_B,2024-03-15 16:30:07.332286,2024-10-15 16:30:07.332286,2025-04-12 16:30:07.332308,12.923686,...,253,74,0.765321,0.694302,Team_21,2,True,2.174953e-04,2.563816e-05,Mature
1,CI_000002,Application,Production,Active,Critical,DataCenter_B,2023-06-26 16:30:07.332588,2025-01-01 16:30:07.332588,2025-05-23 16:30:07.332602,10.663898,...,175,33,0.599421,0.797939,Team_5,4,True,2.928300e-03,8.966268e-03,Mature
2,CI_000003,Application,Production,Active,High,Hybrid,2023-09-21 16:30:07.332811,2023-09-25 16:30:07.332811,2025-03-28 16:30:07.332825,15.256549,...,639,89,0.728267,0.452908,Team_11,3,True,5.617299e-10,3.007848e-06,Mature
3,CI_000004,Application,Staging,Inactive,High,Cloud_AWS,2021-08-27 16:30:07.333010,2023-06-29 16:30:07.333010,2025-03-22 16:30:07.333022,19.616217,...,727,95,0.332145,0.797805,Team_20,2,True,2.989490e-11,1.276449e-06,Legacy
4,CI_000005,Service,Production,Inactive,Medium,DataCenter_A,2023-01-26 16:30:07.333183,2023-01-27 16:30:07.333183,2025-06-25 16:30:07.333194,39.576159,...,880,0,0.342349,0.527780,Team_8,2,True,1.822616e-13,1.000000e+00,Mature
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,CI_004996,Network_Device,Production,Active,Medium,OnPremise,2023-08-01 16:30:08.592101,2023-12-11 16:30:08.592101,2025-05-25 16:30:08.592121,6.506278,...,562,31,0.973001,0.691040,Team_6,2,True,7.315040e-09,1.193152e-02,Mature
4996,CI_004997,Server,Test,Deprecated,Low,Cloud_AWS,2024-01-18 16:30:08.592398,2024-11-12 16:30:08.592398,2025-05-08 16:30:08.592419,36.275581,...,225,48,0.782540,0.528396,Team_18,1,True,5.530844e-04,1.051915e-03,Mature
4997,CI_004998,Database,Staging,Active,Medium,DataCenter_B,2024-07-28 16:30:08.592708,2025-04-08 16:30:08.592708,2025-03-16 16:30:08.592728,21.876777,...,78,101,0.534650,0.446475,Team_8,2,True,7.427358e-02,5.416903e-07,Active
4998,CI_004999,Network_Device,Production,Inactive,Critical,Hybrid,2024-01-05 16:30:08.593144,2024-04-21 16:30:08.593144,2025-04-20 16:30:08.593167,16.845514,...,430,66,0.249746,0.737240,UNASSIGNED,2,False,5.958164e-07,8.039397e-05,Mature


In [6]:
# Health Score Calculation
def calculate_health_score(self):
    """
    Calculate comprehensive health scores for each CI.

    Returns:
    - health_scores: DataFrame with health metrics
    """
    print("\n💚 Calculating Health Scores...")
    df = self.cleaned_data.copy()

    # Performance health
    perf_cols = ['CPU_Utilization', 'Memory_Utilization', 'Disk_Utilization', 'Network_Latency']
    available_perf_cols = [col for col in perf_cols if col in df.columns]
    df['Perf_Health'] = 100
    for col in available_perf_cols:
        if col == 'Network_Latency':
            df['Perf_Health'] -= np.maximum(0, df[col] - 50) * 1.0
        else:
            df['Perf_Health'] -= np.maximum(0, df[col] - 70) * 1.5
    df['Perf_Health'] = np.clip(df['Perf_Health'], 0, 100)

    # Incident health
    if 'Incident_Count_30d' in df.columns:
        df['Incident_Health'] = 100 * np.exp(-df['Incident_Count_30d'] / 3)
    else:
        df['Incident_Health'] = 100

    # Freshness health
    freshness_cols = ['Data_Freshness_Score', 'Scan_Freshness_Score']
    available_freshness_cols = [col for col in freshness_cols if col in df.columns]
    df['Freshness_Health'] = df[available_freshness_cols].mean(axis=1) * 100 if available_freshness_cols else 100

    # Compliance and documentation health
    if 'Compliance_Score' in df.columns:
        df['Compliance_Health'] = df['Compliance_Score'] * 100
    else:
        df['Compliance_Health'] = 100
    if 'Documentation_Completeness' in df.columns:
        df['Doc_Health'] = df['Documentation_Completeness'] * 100
    else:
        df['Doc_Health'] = 100
    if 'Has_Owner' in df.columns:
        df['Owner_Health'] = df['Has_Owner'].astype(int) * 100
    else:
        df['Owner_Health'] = 100

    # Weighted overall health score
    weights = {
        'Perf_Health': 0.35,
        'Incident_Health': 0.25,
        'Freshness_Health': 0.15,
        'Compliance_Health': 0.15,
        'Doc_Health': 0.05,
        'Owner_Health': 0.05
    }

    df['Overall_Health_Score'] = sum(df[col] * weight for col, weight in weights.items())
    self.health_scores = df[['CI_ID'] + list(weights.keys()) + ['Overall_Health_Score']]
    self.cleaned_data = df

    print(f" Health scores calculated. Average health: {df['Overall_Health_Score'].mean():.1f}")

    # Interactive health score visualization
    fig = px.histogram(df, x='Overall_Health_Score', title='Overall Health Score Distribution',
                      nbins=30, color_discrete_sequence=['green'])
    fig.show()

    return self.health_scores


CMDBIntelligenceSystem.calculate_health_score = calculate_health_score

cmdb_system.calculate_health_score()


💚 Calculating Health Scores...
 Health scores calculated. Average health: 213018606.2


Unnamed: 0,CI_ID,Perf_Health,Incident_Health,Freshness_Health,Compliance_Health,Doc_Health,Owner_Health,Overall_Health_Score
0,CI_000001,100.000000,100.000000,1.215667e-02,76.532105,69.430175,100,7.995315e+01
1,CI_000002,100.000000,51.341712,5.947284e-01,59.942120,79.793937,100,6.590565e+01
2,CI_000003,100.000000,100.000000,1.504205e-04,72.826729,45.290831,100,7.818857e+01
3,CI_000004,100.000000,100.000000,6.382394e-05,33.214463,79.780467,100,7.397120e+01
4,CI_000005,100.000000,100.000000,5.000000e+01,34.234945,52.777979,100,8.027414e+01
...,...,...,...,...,...,...,...,...
4995,CI_004996,100.000000,100.000000,5.965765e-01,97.300085,69.103992,100,8.313970e+01
4996,CI_004997,100.000000,36.787944,8.024997e-02,78.253993,52.839567,100,6.358910e+01
4997,CI_004998,94.304212,51.341712,3.713706e+00,53.464973,44.647455,100,6.165108e+01
4998,CI_004999,100.000000,71.653131,4.049489e-03,24.974630,73.723969,0,6.034628e+01


In [7]:
# Risk Score Calculation
def calculate_risk_score(self):
    """
    Calculate risk scores based on multiple factors.

    Returns:
    - risk_scores: DataFrame with risk metrics
    """
    print("\n Calculating Risk Scores...")
    df = self.cleaned_data.copy()

    # Criticality risk
    criticality_map = {'Critical': 100, 'High': 75, 'Medium': 50, 'Low': 25}
    if 'Business_Criticality' in df.columns:
        df['Criticality_Risk'] = df['Business_Criticality'].map(criticality_map)
    else:
        df['Criticality_Risk'] = 50

    # Age risk
    if 'Age_Days' in df.columns:
        df['Age_Risk'] = np.where(
            df['Age_Days'] > 1095,
            50 + (df['Age_Days'] - 1095) / 10,
            np.where(df['Age_Days'] < 30, 40, 0)
        )
        df['Age_Risk'] = np.clip(df['Age_Risk'], 0, 100)
    else:
        df['Age_Risk'] = 0

    # Performance risk
    perf_cols = ['CPU_Utilization', 'Memory_Utilization', 'Disk_Utilization', 'Network_Latency']
    available_perf_cols = [col for col in perf_cols if col in df.columns]
    df['Performance_Risk'] = 0
    for col in available_perf_cols:
        if col == 'Network_Latency':
            df['Performance_Risk'] += np.maximum(0, df[col] - 50) * 1.0
        else:
            df['Performance_Risk'] += np.maximum(0, df[col] - 80) * 1.5
    df['Performance_Risk'] = np.clip(df['Performance_Risk'], 0, 100)

    # Change risk
    if 'Change_Count_30d' in df.columns:
        df['Change_Risk'] = np.where(
            df['Change_Count_30d'] > 10,
            50 + (df['Change_Count_30d'] - 10) * 3,
            np.where(df['Change_Count_30d'] == 0, 30, 0)
        )
        df['Change_Risk'] = np.clip(df['Change_Risk'], 0, 100)
    else:
        df['Change_Risk'] = 0

    # Staleness risk
    staleness_cols = ['Days_Since_Update', 'Days_Since_Scan']
    available_staleness_cols = [col for col in staleness_cols if col in df.columns]
    df['Staleness_Risk'] = 0
    if 'Days_Since_Update' in df.columns:
        df['Staleness_Risk'] += np.maximum(0, df['Days_Since_Update'] - 90) * 0.5
    if 'Days_Since_Scan' in df.columns:
        df['Staleness_Risk'] += np.maximum(0, df['Days_Since_Scan'] - 30) * 1.0
    df['Staleness_Risk'] = np.clip(df['Staleness_Risk'], 0, 100)

    # Compliance risk
    if 'Compliance_Score' in df.columns:
        df['Compliance_Risk'] = (1 - df['Compliance_Score']) * 100
    else:
        df['Compliance_Risk'] = 0

    # Overall risk score
    risk_weights = {
        'Criticality_Risk': 0.25,
        'Performance_Risk': 0.25,
        'Change_Risk': 0.15,
        'Age_Risk': 0.15,
        'Staleness_Risk': 0.1,
        'Compliance_Risk': 0.1
    }

    df['Overall_Risk_Score'] = sum(df[col] * weight for col, weight in risk_weights.items())
    self.risk_scores = df[['CI_ID'] + list(risk_weights.keys()) + ['Overall_Risk_Score']]
    self.cleaned_data = df

    print(f" Risk scores calculated. Average risk: {df['Overall_Risk_Score'].mean():.1f}")

    # Interactive risk score visualization
    fig = px.histogram(df, x='Overall_Risk_Score', title='Overall Risk Score Distribution',
                      nbins=30, color_discrete_sequence=['red'])
    fig.show()

    return self.risk_scores

CMDBIntelligenceSystem.calculate_risk_score = calculate_risk_score


cmdb_system.calculate_risk_score()


 Calculating Risk Scores...
 Risk scores calculated. Average risk: 29.9


Unnamed: 0,CI_ID,Criticality_Risk,Performance_Risk,Change_Risk,Age_Risk,Staleness_Risk,Compliance_Risk,Overall_Risk_Score
0,CI_000001,50,0.0,0,0.0,100.0,23.467895,24.846789
1,CI_000002,100,0.0,30,0.0,45.5,40.057880,38.055788
2,CI_000003,75,0.0,0,0.0,100.0,27.173271,31.467327
3,CI_000004,75,0.0,56,80.3,100.0,66.785537,55.873554
4,CI_000005,50,0.0,0,0.0,100.0,65.765055,29.076505
...,...,...,...,...,...,...,...,...
4995,CI_004996,50,0.0,0,0.0,100.0,2.699915,22.769992
4996,CI_004997,25,0.0,0,0.0,85.5,21.746007,16.974601
4997,CI_004998,50,0.0,0,0.0,71.0,46.535027,24.253503
4998,CI_004999,100,0.0,0,0.0,100.0,75.025370,42.502537


In [8]:
# Behavioral Clustering
def perform_clustering(self):
    """
    Perform unsupervised clustering to identify CI behavior patterns.

    Returns:
    - clusters: Array of cluster assignments
    - cluster_analysis: DataFrame with cluster statistics
    """
    print("\n Performing Behavioral Clustering...")

    # Select features for clustering
    cluster_features = [
        'CPU_Utilization', 'Memory_Utilization', 'Disk_Utilization', 'Network_Latency',
        'Incident_Count_30d', 'Change_Count_30d', 'Age_Days',
        'Overall_Health_Score', 'Overall_Risk_Score', 'Documentation_Completeness', 'Dependency_Count'
    ]
    available_features = [col for col in cluster_features if col in self.cleaned_data.columns]
    X = self.cleaned_data[available_features].copy()

    # Handle missing values
    imputer = SimpleImputer(strategy='median')
    X = imputer.fit_transform(X)

    # Standard scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Elbow method for optimal clusters
    inertias = []
    K_range = range(2, 11)
    for k in K_range:
        kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
        kmeans.fit(X_scaled)
        inertias.append(kmeans.inertia_)

    # Interactive elbow plot
    fig = px.line(x=K_range, y=inertias, title='Elbow Method for Optimal Clusters',
                  labels={'x': 'Number of Clusters', 'y': 'Inertia'})
    fig.show()

    # Use DBSCAN as an alternative clustering method
    from sklearn.cluster import DBSCAN
    dbscan = DBSCAN(eps=0.5, min_samples=5)
    clusters = dbscan.fit_predict(X_scaled)
    self.cleaned_data['Cluster'] = clusters

    # Fallback to KMeans if DBSCAN fails (e.g., too many noise points)
    if len(set(clusters)) <= 2:  # Including -1 for noise
        print("DBSCAN failed to find meaningful clusters, switching to KMeans")
        optimal_k = 5
        kmeans = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
        clusters = kmeans.fit_predict(X_scaled)
        self.cleaned_data['Cluster'] = clusters

    # Cluster analysis
    cluster_analysis = self.cleaned_data.groupby('Cluster').agg({
        'Overall_Health_Score': 'mean',
        'Overall_Risk_Score': 'mean',
        'CPU_Utilization': 'mean',
        'Memory_Utilization': 'mean',
        'Incident_Count_30d': 'mean',
        'Age_Days': 'mean',
        'CI_Type': lambda x: x.mode().iloc[0] if not x.empty else 'Unknown',
        'Business_Criticality': lambda x: x.mode().iloc[0] if not x.empty else 'Unknown'
    }).round(2)

    # Label clusters
    cluster_labels = {}
    for cluster_id in cluster_analysis.index:
        health = cluster_analysis.loc[cluster_id, 'Overall_Health_Score']
        risk = cluster_analysis.loc[cluster_id, 'Overall_Risk_Score']
        age = cluster_analysis.loc[cluster_id, 'Age_Days']
        if health > 80 and risk < 30:
            cluster_labels[cluster_id] = "High-Performance Stable"
        elif risk > 70:
            cluster_labels[cluster_id] = "Problem Assets"
        elif age > 1000:
            cluster_labels[cluster_id] = "Legacy High-Risk"
        elif age < 180:
            cluster_labels[cluster_id] = "New Development"
        else:
            cluster_labels[cluster_id] = "Standard Production"

    self.cleaned_data['Cluster_Label'] = self.cleaned_data['Cluster'].map(cluster_labels)

    print(f"Identified {len(set(clusters))} distinct CI behavior clusters")
    print("\nCluster Analysis:")
    print(cluster_analysis)

    # Interactive PCA visualization
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X_scaled)
    fig = px.scatter(x=X_pca[:, 0], y=X_pca[:, 1], color=self.cleaned_data['Cluster_Label'],
                     title='Cluster Visualization (PCA)',
                     labels={'x': f'PC1 ({pca.explained_variance_ratio_[0]:.1%} variance)',
                             'y': f'PC2 ({pca.explained_variance_ratio_[1]:.1%} variance)'})
    fig.show()

    return clusters, cluster_analysis


CMDBIntelligenceSystem.perform_clustering = perform_clustering


clusters, cluster_analysis = cmdb_system.perform_clustering()


 Performing Behavioral Clustering...


DBSCAN failed to find meaningful clusters, switching to KMeans
Identified 5 distinct CI behavior clusters

Cluster Analysis:
         Overall_Health_Score  Overall_Risk_Score  CPU_Utilization  \
Cluster                                                              
0                1.237520e+03               38.28            24.42   
1                1.903927e+08               23.39            23.93   
2                5.086502e+06               33.73            23.32   
3                1.044579e+08               29.28            39.12   
4                8.491510e+10               29.60            28.52   

         Memory_Utilization  Incident_Count_30d  Age_Days      CI_Type  \
Cluster                                                                  
0                     35.87                1.23   1127.70       Server   
1                     35.33                0.77    410.09       Server   
2                     34.99                1.25    940.41       Server   
3             

In [9]:
# Anomaly Detection
def detect_anomalies(self):
    """
    Detect anomalous CIs using an ensemble of Isolation Forest and One-Class SVM.

    Returns:
    - anomalies: DataFrame of anomalous CIs
    """
    print("\n Detecting Anomalies...")

    # Features for anomaly detection
    anomaly_features = [
        'CPU_Utilization', 'Memory_Utilization', 'Disk_Utilization', 'Network_Latency',
        'Incident_Count_30d', 'Change_Count_30d', 'Days_Since_Update'
    ]
    available_features = [col for col in anomaly_features if col in self.cleaned_data.columns]
    X = self.cleaned_data[available_features].copy()

    # Handle missing values
    imputer = SimpleImputer(strategy='median')
    X = imputer.fit_transform(X)

    # Standard scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Ensemble: Isolation Forest + One-Class SVM
    iso_forest = IsolationForest(contamination=0.05, random_state=42)
    iso_labels = iso_forest.fit_predict(X_scaled)

    from sklearn.svm import OneClassSVM
    svm = OneClassSVM(nu=0.05, kernel='rbf', gamma='auto')
    svm_labels = svm.fit_predict(X_scaled)

    # Combine predictions (anomaly if either model flags it)
    self.cleaned_data['Is_Anomaly'] = (iso_labels == -1) | (svm_labels == -1)
    anomalies = self.cleaned_data[self.cleaned_data['Is_Anomaly']]

    print(f" Detected {len(anomalies)} anomalous CIs ({len(anomalies)/len(self.cleaned_data)*100:.1f}%)")

    # Analyze anomaly patterns
    print("\nAnomaly Patterns:")
    print(f"- High-risk anomalies: {len(anomalies[anomalies['Overall_Risk_Score'] > 70])}")
    print(f"- Critical system anomalies: {len(anomalies[anomalies['Business_Criticality'] == 'Critical'])}")
    print(f"- Production anomalies: {len(anomalies[anomalies['Environment'] == 'Production'])}")

    # Interactive anomaly visualization
    normal_data = self.cleaned_data[~self.cleaned_data['Is_Anomaly']]
    anomaly_data = self.cleaned_data[self.cleaned_data['Is_Anomaly']]
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=normal_data['CPU_Utilization'], y=normal_data['Memory_Utilization'],
                             mode='markers', name='Normal', marker=dict(color='blue', size=5)))
    fig.add_trace(go.Scatter(x=anomaly_data['CPU_Utilization'], y=anomaly_data['Memory_Utilization'],
                             mode='markers', name='Anomaly', marker=dict(color='red', size=10, symbol='x')))
    fig.update_layout(title='Anomaly Detection Results',
                      xaxis_title='CPU Utilization %',
                      yaxis_title='Memory Utilization %',
                      showlegend=True)
    fig.show()

    return anomalies

CMDBIntelligenceSystem.detect_anomalies = detect_anomalies


anomalies = cmdb_system.detect_anomalies()


 Detecting Anomalies...
 Detected 337 anomalous CIs (6.7%)

Anomaly Patterns:
- High-risk anomalies: 0
- Critical system anomalies: 86
- Production anomalies: 99


In [10]:
#  Generate Insights
def generate_insights(self):
    """
    Generate actionable business insights.

    Returns:
    - insights: Dictionary of key metrics
    """
    print("\n EXECUTIVE INSIGHTS ")
    print("=" * 50)

    total_cis = len(self.cleaned_data)
    critical_cis = len(self.cleaned_data[self.cleaned_data['Business_Criticality'] == 'Critical'])
    high_risk_cis = len(self.cleaned_data[self.cleaned_data['Overall_Risk_Score'] > 70])
    poor_health_cis = len(self.cleaned_data[self.cleaned_data['Overall_Health_Score'] < 60])
    stale_data = len(self.cleaned_data[self.cleaned_data['Days_Since_Update'] > 90])
    missing_owners = len(self.cleaned_data[self.cleaned_data['Owner'] == 'UNASSIGNED'])

    print(f" CMDB HEALTH DASHBOARD:")
    print(f"   Total CIs: {total_cis:,}")
    print(f"   Critical Systems: {critical_cis:,} ({critical_cis/total_cis*100:.1f}%)")
    print(f"   High-Risk Systems: {high_risk_cis:,} ({high_risk_cis/total_cis*100:.1f}%)")
    print(f"   Poor Health Systems: {poor_health_cis:,} ({poor_health_cis/total_cis*100:.1f}%)")
    print(f"   Stale Records (>90 days): {stale_data:,} ({stale_data/total_cis*100:.1f}%)")
    print(f"   Missing Owners: {missing_owners:,} ({missing_owners/total_cis*100:.1f}%)")

    # Interactive dashboard
    fig = make_subplots(rows=1, cols=2, subplot_titles=('Health Score Distribution', 'Risk Score Distribution'))
    fig.add_trace(go.Histogram(x=self.cleaned_data['Overall_Health_Score'], name='Health Score',
                               marker_color='green'), row=1, col=1)
    fig.add_trace(go.Histogram(x=self.cleaned_data['Overall_Risk_Score'], name='Risk Score',
                               marker_color='red'), row=1, col=2)
    fig.update_layout(title_text='CMDB Health and Risk Dashboard', showlegend=False)
    fig.show()

    return {
        'total_cis': total_cis,
        'high_risk_count': high_risk_cis,
        'poor_health_count': poor_health_cis,
        'stale_data_count': stale_data,
        'missing_owners': missing_owners
    }

CMDBIntelligenceSystem.generate_insights = generate_insights

insights = cmdb_system.generate_insights()


 EXECUTIVE INSIGHTS 
 CMDB HEALTH DASHBOARD:
   Total CIs: 5,000
   Critical Systems: 751 (15.0%)
   High-Risk Systems: 0 (0.0%)
   Poor Health Systems: 297 (5.9%)
   Stale Records (>90 days): 3,458 (69.2%)
   Missing Owners: 762 (15.2%)


In [11]:
#Generate Recommendations
def generate_recommendations(self):
    """
    Generate specific actionable recommendations.

    Returns:
    - recommendations: List of recommendation dictionaries
    """
    print("\n ACTIONABLE RECOMMENDATIONS")
    print("=" * 50)

    recommendations = []
    df = self.cleaned_data

    # Data quality
    stale_pct = len(df[df['Days_Since_Update'] > 90]) / len(df) * 100
    if stale_pct > 15:
        recommendations.append({
            'priority': 'HIGH',
            'category': 'Data Quality',
            'action': 'Implement automated CMDB scanning',
            'details': f'{stale_pct:.1f}% of records are stale (>90 days)',
            'impact': 'Improve data accuracy by 80%'
        })

    # Security
    high_risk_critical = len(df[(df['Overall_Risk_Score'] > 70) & (df['Business_Criticality'] == 'Critical')])
    if high_risk_critical > 0:
        recommendations.append({
            'priority': 'CRITICAL',
            'category': 'Risk Management',
            'action': 'Immediate review of high-risk critical systems',
            'details': f'{high_risk_critical} critical systems with risk score >70',
            'impact': 'Prevent potential production outages'
        })

    # Performance
    high_util_systems = len(df[(df['CPU_Utilization'] > 85) | (df['Memory_Utilization'] > 90)])
    if high_util_systems > 0:
        recommendations.append({
            'priority': 'MEDIUM',
            'category': 'Performance',
            'action': 'Scale or optimize high-utilization systems',
            'details': f'{high_util_systems} systems with extreme resource usage',
            'impact': 'Reduce performance degradation risk'
        })

    # Display recommendations
    for i, rec in enumerate(recommendations, 1):
        print(f"\n{i}. [{rec['priority']}] {rec['category']}")
        print(f"   Action: {rec['action']}")
        print(f"   Details: {rec['details']}")
        print(f"   Impact: {rec['impact']}")

    # Interactive recommendation table
    fig = go.Figure(data=[go.Table(
        header=dict(values=['Priority', 'Category', 'Action', 'Details', 'Impact'],
                    fill_color='paleturquoise',
                    align='left'),
        cells=dict(values=[list(map(lambda x: x['priority'], recommendations)),
                           list(map(lambda x: x['category'], recommendations)),
                           list(map(lambda x: x['action'], recommendations)),
                           list(map(lambda x: x['details'], recommendations)),
                           list(map(lambda x: x['impact'], recommendations))],
                   fill_color='lavender',
                   align='left'))
    ])
    fig.update_layout(title='Actionable Recommendations')
    fig.show()

    return recommendations


CMDBIntelligenceSystem.generate_recommendations = generate_recommendations

recommendations = cmdb_system.generate_recommendations()


 ACTIONABLE RECOMMENDATIONS

1. [HIGH] Data Quality
   Action: Implement automated CMDB scanning
   Details: 69.2% of records are stale (>90 days)
   Impact: Improve data accuracy by 80%

2. [MEDIUM] Performance
   Action: Scale or optimize high-utilization systems
   Details: 20 systems with extreme resource usage
   Impact: Reduce performance degradation risk


In [12]:
  # Comprehensive Visualizations
  def create_visualizations(self):
      """
      Create comprehensive interactive visualizations using Plotly.
      """
      print("\n Generating Visualizations...")

      df = self.cleaned_data
      fig = make_subplots(rows=3, cols=2,
                          subplot_titles=(
                              'Health vs Risk', 'CI Type Distribution',
                              'Environment vs Criticality', 'Cluster Visualization (PCA)',
                              'Anomaly Detection', 'Lifecycle Stage Distribution'
                          ),
                          specs=[[{"type": "scatter"}, {"type": "pie"}],
                                 [{"type": "heatmap"}, {"type": "scatter"}],
                                 [{"type": "scatter"}, {"type": "histogram"}]])

      # Health vs Risk
      fig.add_trace(go.Scatter(x=df['Overall_Risk_Score'], y=df['Overall_Health_Score'],
                               mode='markers', marker=dict(color=df['Cluster'], colorscale='Viridis'),
                               text=df['CI_ID'], name='CIs'),
                    row=1, col=1)
      fig.add_hline(y=70, line_dash="dash", line_color="red", row=1, col=1)
      fig.add_vline(x=50, line_dash="dash", line_color="red", row=1, col=1)

      # CI Type Distribution
      ci_counts = df['CI_Type'].value_counts()
      fig.add_trace(go.Pie(labels=ci_counts.index, values=ci_counts.values, name='CI Types'),
                    row=1, col=2)

      # Environment vs Criticality Heatmap
      env_crit = pd.crosstab(df['Environment'], df['Business_Criticality'])
      fig.add_trace(go.Heatmap(z=env_crit.values, x=env_crit.columns, y=env_crit.index,
                               colorscale='YlOrRd', showscale=True),
                    row=2, col=1)

      # Cluster Visualization (PCA)
      cluster_features = ['CPU_Utilization', 'Memory_Utilization', 'Overall_Health_Score', 'Overall_Risk_Score']
      available_features = [col for col in cluster_features if col in df.columns]
      X_cluster = df[available_features]
      pca = PCA(n_components=2)
      X_pca = pca.fit_transform(StandardScaler().fit_transform(X_cluster))
      fig.add_trace(go.Scatter(x=X_pca[:, 0], y=X_pca[:, 1], mode='markers',
                               marker=dict(color=df['Cluster'], colorscale='Viridis'),
                               text=df['CI_ID'], name='Clusters'),
                    row=2, col=2)

      # Anomaly Detection
      normal_data = df[~df['Is_Anomaly']]
      anomaly_data = df[df['Is_Anomaly']]
      fig.add_trace(go.Scatter(x=normal_data['CPU_Utilization'], y=normal_data['Memory_Utilization'],
                               mode='markers', name='Normal', marker=dict(color='blue', size=5)),
                    row=3, col=1)
      fig.add_trace(go.Scatter(x=anomaly_data['CPU_Utilization'], y=anomaly_data['Memory_Utilization'],
                               mode='markers', name='Anomaly', marker=dict(color='red', size=10, symbol='x')),
                    row=3, col=1)

      # Lifecycle Stage Distribution
      fig.add_trace(go.Histogram(x=df['Lifecycle_Stage'], name='Lifecycle Stages',
                                 marker_color='steelblue'),
                    row=3, col=2)

      fig.update_layout(height=1200, width=1200, title_text="CMDB Analysis Dashboard", showlegend=True)
      fig.show()
      fig.write_html('cmdb_analysis_dashboard.html')

      print(" Dashboard saved as 'cmdb_analysis_dashboard.html'")

  CMDBIntelligenceSystem.create_visualizations = create_visualizations

  cmdb_system.create_visualizations()


 Generating Visualizations...


 Dashboard saved as 'cmdb_analysis_dashboard.html'


In [13]:
#  Run Complete Analysis
def run_complete_analysis(self):
    print(" CMDB Deep Intelligence System")
    print("=" * 60)

    results = {}
    results['data'] = self.generate_cmdb_data() if self.data is None else self.data
    results['quality_report'] = self.profile_data()
    results['cleaned_data'] = self.clean_and_preprocess()
    results['health_scores'] = self.calculate_health_score()
    results['risk_scores'] = self.calculate_risk_score()
    results['clusters'], results['cluster_analysis'] = self.perform_clustering()
    results['anomalies'] = self.detect_anomalies()
    results['insights'] = self.generate_insights()
    results['recommendations'] = self.generate_recommendations()
    self.create_visualizations()

    print("\n ANALYSIS COMPLETE!")
    return results


CMDBIntelligenceSystem.run_complete_analysis = run_complete_analysis


results = cmdb_system.run_complete_analysis()

 CMDB Deep Intelligence System

 CMDB Data Profiling Report
Total Records: 5,000
Total Columns: 22
Memory Usage: 3.5 MB

 Data Quality Assessment:
                            Missing_Count  Missing_Percentage
Owner                                 762               15.24
Documentation_Completeness            675               13.50
CPU_Utilization                       522               10.44
Memory_Utilization                    407                8.14



 Categorical Distributions:

CI_Type:
CI_Type
Server            1222
Application        964
Database           799
Network_Device     703
Storage            542
Name: count, dtype: int64



Environment:
Environment
Production     1785
Development     994
Staging         976
Test            718
DR              527
Name: count, dtype: int64



Status:
Status
Active         3253
Deprecated      527
Inactive        492
Maintenance     471
Planned         257
Name: count, dtype: int64



Business_Criticality:
Business_Criticality
Medium      1985
High        1511
Low          753
Critical     751
Name: count, dtype: int64



Location:
Location
OnPremise       871
Cloud_Azure     859
DataCenter_A    825
Cloud_AWS       825
DataCenter_B    812
Name: count, dtype: int64



 Numerical Metrics Summary:
       CPU_Utilization  Memory_Utilization  Disk_Utilization  \
count      4478.000000         4593.000000       5000.000000   
mean         26.849781           39.668211         28.197721   
std          15.624963           17.981218         20.462893   
min           0.362601            0.669654          0.244550   
25%          14.635442           25.851862         12.858661   
50%          24.619506           38.082685         22.322626   
75%          36.442580           51.702212         38.212217   
max          89.233452           95.772251         97.895220   

       Incident_Count_30d  Change_Count_30d  Network_Latency  
count         5000.000000       5000.000000      5000.000000  
mean             1.028200          3.628400        12.002526  
std              1.348395          3.186525        12.803178  
min              0.000000          0.000000         0.001266  
25%              0.000000          1.000000         3.115540  
50%             


🧹 Data Cleaning and Preprocessing...
Data cleaning completed
Records after cleaning: 5000



A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.






💚 Calculating Health Scores...
 Health scores calculated. Average health: 213018606.2



 Calculating Risk Scores...
 Risk scores calculated. Average risk: 29.9



 Performing Behavioral Clustering...


DBSCAN failed to find meaningful clusters, switching to KMeans
Identified 5 distinct CI behavior clusters

Cluster Analysis:
         Overall_Health_Score  Overall_Risk_Score  CPU_Utilization  \
Cluster                                                              
0                1.237520e+03               38.28            24.42   
1                1.903927e+08               23.39            23.93   
2                5.086502e+06               33.73            23.32   
3                1.044579e+08               29.28            39.12   
4                8.491510e+10               29.60            28.52   

         Memory_Utilization  Incident_Count_30d  Age_Days      CI_Type  \
Cluster                                                                  
0                     35.87                1.23   1127.70       Server   
1                     35.33                0.77    410.09       Server   
2                     34.99                1.25    940.41       Server   
3             


 Detecting Anomalies...
 Detected 337 anomalous CIs (6.7%)

Anomaly Patterns:
- High-risk anomalies: 0
- Critical system anomalies: 86
- Production anomalies: 99



 EXECUTIVE INSIGHTS 
 CMDB HEALTH DASHBOARD:
   Total CIs: 5,000
   Critical Systems: 751 (15.0%)
   High-Risk Systems: 0 (0.0%)
   Poor Health Systems: 297 (5.9%)
   Stale Records (>90 days): 3,458 (69.2%)
   Missing Owners: 762 (15.2%)



 ACTIONABLE RECOMMENDATIONS

1. [HIGH] Data Quality
   Action: Implement automated CMDB scanning
   Details: 69.2% of records are stale (>90 days)
   Impact: Improve data accuracy by 80%

2. [MEDIUM] Performance
   Action: Scale or optimize high-utilization systems
   Details: 20 systems with extreme resource usage
   Impact: Reduce performance degradation risk



 Generating Visualizations...


 Dashboard saved as 'cmdb_analysis_dashboard.html'

 ANALYSIS COMPLETE!


In [14]:
#from google.colab import files
#files.download('cmdb_analysis_dashboard.html')