In [None]:
import pandas as pd
import numpy as np
from sklearn.covariance import GraphicalLasso
import matplotlib.pyplot as plt
import networkx as nx

# 加载数据集
file_path = '/content/drive/MyDrive/Colab Notebooks/GGM模型/data/整理数据.xlsx'  # 谷歌colab云端地址，需要修改您自己的位置
data = pd.read_excel(file_path, sheet_name=None)
data_france = data['Sheet1'][data['Sheet1']['country_name'] == 'France']
data_england = data['Sheet1'][data['Sheet1']['country_name'] == 'England']


# 选择包含年份的相关变量
variables_with_year = ['city_latitude', 'city_longitude', 'year', 'riot_type_hiscod_num']

data_france_with_year = data_france[variables_with_year].dropna()
data_england_with_year = data_england[variables_with_year].dropna()

# 拟合高斯图模型
model_france_with_year = GraphicalLasso(alpha=0.1)
model_england_with_year = GraphicalLasso(alpha=0.1)

model_france_with_year.fit(data_france_with_year)
model_england_with_year.fit(data_england_with_year)

# 提取精度矩阵
precision_matrix_france_with_year = model_france_with_year.precision_
precision_matrix_england_with_year = model_england_with_year.precision_

# 计算逆协方差矩阵
inverse_covariance_matrix_france_with_year = np.linalg.inv(model_france_with_year.covariance_)
inverse_covariance_matrix_england_with_year = np.linalg.inv(model_england_with_year.covariance_)



# 创建包含逆协方差矩阵的数据框
variables = ['city_latitude', 'city_longitude', 'year', 'riot_type_hiscod_num']

inverse_cov_df_france = pd.DataFrame(inverse_covariance_matrix_france_with_year, index=variables, columns=variables)
inverse_cov_df_england = pd.DataFrame(inverse_covariance_matrix_england_with_year, index=variables, columns=variables)

# 保存数据框到Excel文件
output_path = '/content/drive/MyDrive/Colab Notebooks/GGM模型/GGM模型inverse_covariance_matrices.xlsx'
with pd.ExcelWriter(output_path) as writer:
    inverse_cov_df_france.to_excel(writer, sheet_name='France (With Year)')
    inverse_cov_df_england.to_excel(writer, sheet_name='England (With Year)')




Excel file saved to /content/drive/MyDrive/Colab Notebooks/GGM模型/GGM模型inverse_covariance_matrices.xlsx


In [None]:
import pandas as pd
import numpy as np
from sklearn.covariance import GraphicalLasso
from geopy.distance import great_circle

# 加载数据集
file_path = '/content/drive/MyDrive/Colab Notebooks/GGM模型/data/整理数据.xlsx'  #谷歌云端colab写的，需要换成您自己本地位置
data = pd.read_excel(file_path, sheet_name=None)
data_france = data['Sheet1'][data['Sheet1']['country_name'] == 'France']
data_england = data['Sheet1'][data['Sheet1']['country_name'] == 'England']

# 清理数据，去除包含NaN的行
data_france = data_france.dropna(subset=['city_latitude', 'city_longitude'])
data_england = data_england.dropna(subset=['city_latitude', 'city_longitude'])

# 采用great_circle计算距离
def compute_distance_matrix(data):
    coordinates = data[['city_latitude', 'city_longitude']].values
    n = coordinates.shape[0]
    distance_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            distance_matrix[i, j] = great_circle(coordinates[i], coordinates[j]).kilometers
    return distance_matrix

# 计算法国和英格兰的数据的距离矩阵，采用均值距离的方法
distance_matrix_france = compute_distance_matrix(data_france)
distance_matrix_england = compute_distance_matrix(data_england)

# 将距离矩阵转换为DataFrame，并添加到原始数据集中
distance_df_france = pd.DataFrame(distance_matrix_france, index=data_france.index, columns=data_france.index)
distance_df_england = pd.DataFrame(distance_matrix_england, index=data_england.index, columns=data_england.index)

# 将距离作为新变量添加到数据集中
data_france_with_distance = data_france.join(distance_df_france.mean(axis=1).rename('avg_distance'))
data_england_with_distance = data_england.join(distance_df_england.mean(axis=1).rename('avg_distance'))

# 选择包含距离的相关变量
variables_with_distance = ['city_latitude', 'city_longitude', 'year', 'riot_type_hiscod_num', 'avg_distance']

data_france_with_distance = data_france_with_distance[variables_with_distance].dropna()
data_england_with_distance = data_england_with_distance[variables_with_distance].dropna()

# 拟合高斯图模型
model_france_with_distance = GraphicalLasso(alpha=0.1)
model_england_with_distance = GraphicalLasso(alpha=0.1)

model_france_with_distance.fit(data_france_with_distance)
model_england_with_distance.fit(data_england_with_distance)

# 提取精度矩阵
precision_matrix_france_with_distance = model_france_with_distance.precision_
precision_matrix_england_with_distance = model_england_with_distance.precision_

# 计算逆协方差矩阵
inverse_covariance_matrix_france_with_distance = np.linalg.inv(model_france_with_distance.covariance_)
inverse_covariance_matrix_england_with_distance = np.linalg.inv(model_england_with_distance.covariance_)

# 创建逆协方差矩阵的数据框格式
variables_with_distance = ['city_latitude', 'city_longitude', 'year', 'riot_type_hiscod_num', 'avg_distance']

inverse_cov_df_france = pd.DataFrame(inverse_covariance_matrix_france_with_distance, index=variables_with_distance, columns=variables_with_distance)
inverse_cov_df_england = pd.DataFrame(inverse_covariance_matrix_england_with_distance, index=variables_with_distance, columns=variables_with_distance)

# 保存Englland和France的逆协方差矩阵到Excel文件
output_path = '/content/drive/MyDrive/Colab Notebooks/GGM模型/GMM模型inverse_covariance_matrices_with_distance.xlsx'
with pd.ExcelWriter(output_path) as writer:
    inverse_cov_df_france.to_excel(writer, sheet_name='France (With Distance)')
    inverse_cov_df_england.to_excel(writer, sheet_name='England (With Distance)')




Excel file saved to /content/drive/MyDrive/Colab Notebooks/GGM模型/inverse_covariance_matrices_with_distance.xlsx
