Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
vrouter: implement packet handling on Hyper-V
Changes required to support Contrail on Windows: - windows/vr_nbl.c - code responsible for handling incoming NET_BUFFER_LIST (Windows' internal packet representation) - windows/vr_host_interface.c - implementation of vr_host_interface_ops callback layer on Windows Initial work: https://github.com/codilime/contrail-vrouter/commits/windows Change-Id: Icadff011051c0f44719d1a08e2e8042fa732e7b8 Partial-Bug: #1734699
- Loading branch information
Dariusz Sosnowski
committed
Dec 11, 2017
1 parent
7e1320e
commit e774954
Showing
2 changed files
with
847 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,358 @@ | ||
/* | ||
* Copyright (c) 2017 Juniper Networks, Inc. All rights reserved. | ||
*/ | ||
#include "precomp.h" | ||
|
||
#include "vr_interface.h" | ||
#include "vr_packet.h" | ||
#include "vr_windows.h" | ||
#include "vrouter.h" | ||
#include "windows_devices.h" | ||
#include "windows_nbl.h" | ||
|
||
static NDIS_MUTEX win_if_mutex; | ||
|
||
void | ||
win_if_lock(void) | ||
{ | ||
NDIS_WAIT_FOR_MUTEX(&win_if_mutex); | ||
} | ||
|
||
void | ||
win_if_unlock(void) | ||
{ | ||
NDIS_RELEASE_MUTEX(&win_if_mutex); | ||
} | ||
|
||
static int | ||
win_if_add(struct vr_interface* vif) | ||
{ | ||
if (vif->vif_type == VIF_TYPE_STATS) | ||
return 0; | ||
|
||
if (vif->vif_name[0] == '\0') | ||
return -ENODEV; | ||
|
||
// Unlike FreeBSD/Linux, we don't have to register handlers here | ||
|
||
return 0; | ||
} | ||
|
||
static int | ||
win_if_add_tap(struct vr_interface* vif) | ||
{ | ||
UNREFERENCED_PARAMETER(vif); | ||
// NOOP - no bridges on Windows | ||
return 0; | ||
} | ||
|
||
static int | ||
win_if_del(struct vr_interface *vif) | ||
{ | ||
UNREFERENCED_PARAMETER(vif); | ||
return 0; | ||
} | ||
|
||
static int | ||
win_if_del_tap(struct vr_interface *vif) | ||
{ | ||
UNREFERENCED_PARAMETER(vif); | ||
// NOOP - no bridges on Windows; most *_drv_del function which call if_del_tap | ||
// also call if_del | ||
return 0; | ||
} | ||
|
||
static uint16_t | ||
trim_csum(uint32_t csum) | ||
{ | ||
while (csum & 0xffff0000) | ||
csum = (csum >> 16) + (csum & 0x0000ffff); | ||
|
||
return (uint16_t)csum; | ||
} | ||
|
||
static uint16_t | ||
calc_csum(uint8_t* ptr, size_t size) | ||
{ | ||
uint32_t csum = 0; | ||
// Checksum based on payload | ||
for (int i = 0; i < size; i++) | ||
{ | ||
if (i & 1) | ||
csum += ptr[i]; | ||
else | ||
csum += ptr[i] << 8; | ||
} | ||
|
||
return trim_csum(csum); | ||
} | ||
|
||
static void | ||
fix_ip_csum_at_offset(struct vr_packet *pkt, unsigned offset) | ||
{ | ||
struct vr_ip *iph; | ||
|
||
ASSERT(0 < offset); | ||
|
||
iph = (struct vr_ip *)pkt_data_at_offset(pkt, offset); | ||
iph->ip_csum = vr_ip_csum(iph); | ||
} | ||
|
||
static void | ||
zero_ip_csum_at_offset(struct vr_packet *pkt, unsigned offset) | ||
{ | ||
struct vr_ip *iph; | ||
|
||
ASSERT(0 < offset); | ||
|
||
iph = (struct vr_ip *)pkt_data_at_offset(pkt, offset); | ||
iph->ip_csum = 0; | ||
} | ||
|
||
static bool fix_csum(struct vr_packet *pkt, unsigned offset) | ||
{ | ||
uint32_t csum; | ||
uint16_t size; | ||
uint8_t type; | ||
|
||
PNET_BUFFER_LIST nbl = pkt->vp_net_buffer_list; | ||
PNET_BUFFER nb = NET_BUFFER_LIST_FIRST_NB(nbl); | ||
|
||
void* packet_data_buffer = ExAllocatePoolWithTag(NonPagedPoolNx, NET_BUFFER_DATA_LENGTH(nb), VrAllocationTag); | ||
|
||
// Copy the packet. This function will not fail if ExAllocatePoolWithTag succeeded | ||
// So no need to clean it up | ||
// If ExAllocatePoolWithTag failed (packet_data_buffer== NULL), | ||
// this function will work okay if the data is contigous. | ||
uint8_t* packet_data = NdisGetDataBuffer(nb, NET_BUFFER_DATA_LENGTH(nb), packet_data_buffer, 1, 0); | ||
|
||
if (packet_data == NULL) | ||
// No need for free | ||
return false; | ||
|
||
if (pkt->vp_type == VP_TYPE_IP6 || pkt->vp_type == VP_TYPE_IP6OIP) { | ||
struct vr_ip6 *hdr = (struct vr_ip6*) (packet_data + offset); | ||
offset += sizeof(struct vr_ip6); | ||
size = ntohs(hdr->PayloadLength); | ||
|
||
type = hdr->NextHeader; | ||
} else { | ||
struct vr_ip *hdr = (struct vr_ip*) &packet_data[offset]; | ||
offset += hdr->ip_hl * 4; | ||
size = ntohs(hdr->ip_len) - 4 * hdr->ip_hl; | ||
|
||
type = hdr->ip_proto; | ||
} | ||
|
||
uint8_t* payload = &packet_data[offset]; | ||
csum = calc_csum((uint8_t*) payload, size); | ||
|
||
// This time it's the "real" packet. Header being contiguous is guaranteed, but nothing else | ||
if (type == VR_IP_PROTO_UDP) { | ||
struct vr_udp* udp = (struct vr_udp*) pkt_data_at_offset(pkt, offset); | ||
udp->udp_csum = htons(~(trim_csum(csum))); | ||
} else if (type == VR_IP_PROTO_TCP) { | ||
struct vr_tcp* tcp = (struct vr_tcp*) pkt_data_at_offset(pkt, offset); | ||
tcp->tcp_csum = htons(~(trim_csum(csum))); | ||
} | ||
|
||
if (packet_data_buffer) | ||
ExFreePool(packet_data_buffer); | ||
|
||
return true; | ||
} | ||
|
||
static void | ||
fix_tunneled_csum(struct vr_packet *pkt) | ||
{ | ||
PNET_BUFFER_LIST nbl = pkt->vp_net_buffer_list; | ||
NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO settings; | ||
settings.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo); | ||
|
||
if (settings.Transmit.IpHeaderChecksum) { | ||
// Zero the outer checksum, it'll be offloaded | ||
zero_ip_csum_at_offset(pkt, sizeof(struct vr_eth)); | ||
// Fix the inner checksum, it will not be offloaded | ||
fix_ip_csum_at_offset(pkt, pkt->vp_inner_network_h); | ||
} else { | ||
// Fix the outer checksum | ||
fix_ip_csum_at_offset(pkt, sizeof(struct vr_eth)); | ||
// Inner checksum is OK | ||
} | ||
|
||
if (settings.Transmit.TcpChecksum) { | ||
// Calculate the header/data csum and turn off HW acceleration | ||
if (fix_csum(pkt, pkt->vp_inner_network_h)) { | ||
settings.Transmit.TcpChecksum = 0; | ||
settings.Transmit.TcpHeaderOffset = 0; | ||
NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo) = settings.Value; | ||
} | ||
// else try to offload it even though it's tunneled. | ||
} | ||
|
||
if (settings.Transmit.UdpChecksum) { | ||
// Calculate the header/data csum and turn off HW acceleration | ||
if (fix_csum(pkt, pkt->vp_inner_network_h)) { | ||
settings.Transmit.UdpChecksum = 0; | ||
NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo) = settings.Value; | ||
} | ||
// else try to offload it even though it's tunneled. | ||
} | ||
} | ||
|
||
static int | ||
__win_if_tx(struct vr_interface *vif, struct vr_packet *pkt) | ||
{ | ||
if (vr_pkt_type_is_overlay(pkt->vp_type)) | ||
fix_tunneled_csum(pkt); | ||
|
||
PNET_BUFFER_LIST nbl = pkt->vp_net_buffer_list; | ||
|
||
NDIS_SWITCH_PORT_DESTINATION newDestination = { 0 }; | ||
|
||
newDestination.PortId = vif->vif_port; | ||
newDestination.NicIndex = vif->vif_nic; | ||
DbgPrint("Adding target, PID: %u, NID: %u\r\n", newDestination.PortId, newDestination.NicIndex); | ||
|
||
VrSwitchObject->NdisSwitchHandlers.AddNetBufferListDestination(VrSwitchObject->NdisSwitchContext, nbl, &newDestination); | ||
|
||
PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwd = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); | ||
fwd->IsPacketDataSafe = TRUE; | ||
|
||
NdisAdvanceNetBufferListDataStart(nbl, pkt->vp_data, TRUE, NULL); | ||
|
||
NdisFSendNetBufferLists(VrSwitchObject->NdisFilterHandle, | ||
nbl, | ||
NDIS_DEFAULT_PORT_NUMBER, | ||
0); | ||
|
||
return 0; | ||
} | ||
|
||
static int | ||
win_if_tx(struct vr_interface *vif, struct vr_packet* pkt) | ||
{ | ||
DbgPrint("%s: Got pkt\n", __func__); | ||
if (vif == NULL) { | ||
FreeNetBufferList(pkt->vp_net_buffer_list); | ||
return 0; // Sent into /dev/null | ||
} | ||
|
||
if (vif->vif_type == VIF_TYPE_AGENT) | ||
return pkt0_if_tx(vif, pkt); | ||
else | ||
return __win_if_tx(vif, pkt); | ||
} | ||
|
||
static int | ||
win_if_rx(struct vr_interface *vif, struct vr_packet* pkt) | ||
{ | ||
DbgPrint("%s: Got pkt\n", __func__); | ||
|
||
// Since we are operating from virtual switch's PoV and not from OS's PoV, RXing is the same as TXing | ||
// On Linux, we receive the packet as an OS, but in Windows we are a switch to we simply push the packet to OS's networking stack | ||
// See vhost_tx for reference (it calls hif_ops->hif_rx) | ||
|
||
win_if_tx(vif, pkt); | ||
|
||
return 0; | ||
} | ||
|
||
static int | ||
win_if_get_settings(struct vr_interface *vif, struct vr_interface_settings *settings) | ||
{ | ||
UNREFERENCED_PARAMETER(vif); | ||
UNREFERENCED_PARAMETER(settings); | ||
|
||
/* TODO: Implement */ | ||
DbgPrint("%s(): dummy implementation called\n", __func__); | ||
|
||
return -EINVAL; | ||
} | ||
|
||
static unsigned int | ||
win_if_get_mtu(struct vr_interface *vif) | ||
{ | ||
UNREFERENCED_PARAMETER(vif); | ||
|
||
/* TODO: Implement */ | ||
DbgPrint("%s(): dummy implementation called\n", __func__); | ||
|
||
return vif->vif_mtu; | ||
} | ||
|
||
static unsigned short | ||
win_if_get_encap(struct vr_interface *vif) | ||
{ | ||
UNREFERENCED_PARAMETER(vif); | ||
|
||
/* TODO: Implement */ | ||
DbgPrint("%s(): dummy implementation called\n", __func__); | ||
|
||
return VIF_ENCAP_TYPE_ETHER; | ||
} | ||
|
||
static struct vr_host_interface_ops win_host_interface_ops = { | ||
.hif_lock = win_if_lock, | ||
.hif_unlock = win_if_unlock, | ||
.hif_add = win_if_add, | ||
.hif_del = win_if_del, | ||
.hif_add_tap = win_if_add_tap, | ||
.hif_del_tap = win_if_del_tap, | ||
.hif_tx = win_if_tx, | ||
.hif_rx = win_if_rx, | ||
.hif_get_settings = win_if_get_settings, | ||
.hif_get_mtu = win_if_get_mtu, | ||
.hif_get_encap = win_if_get_encap, | ||
.hif_stats_update = NULL, | ||
}; | ||
|
||
void | ||
vr_host_vif_init(struct vrouter *router) | ||
{ | ||
UNREFERENCED_PARAMETER(router); | ||
} | ||
|
||
void | ||
vr_host_interface_exit(void) | ||
{ | ||
/* Noop */ | ||
} | ||
|
||
void | ||
vhost_xconnect(void) | ||
{ | ||
struct vrouter *vrouter = vrouter_get(0); | ||
struct vr_interface *host_if; | ||
|
||
if (vrouter->vr_host_if != NULL) { | ||
host_if = vrouter->vr_host_if; | ||
vif_set_xconnect(host_if); | ||
|
||
if (host_if->vif_bridge != NULL) | ||
vif_set_xconnect(host_if->vif_bridge); | ||
} | ||
} | ||
|
||
void | ||
vhost_remove_xconnect(void) | ||
{ | ||
struct vrouter *vrouter = vrouter_get(0); | ||
struct vr_interface *host_if; | ||
|
||
if (vrouter->vr_host_if != NULL) { | ||
host_if = vrouter->vr_host_if; | ||
vif_remove_xconnect(host_if); | ||
|
||
if (host_if->vif_bridge != NULL) | ||
vif_remove_xconnect(host_if->vif_bridge); | ||
} | ||
} | ||
|
||
struct vr_host_interface_ops * | ||
vr_host_interface_init(void) | ||
{ | ||
NDIS_INIT_MUTEX(&win_if_mutex); | ||
|
||
return &win_host_interface_ops; | ||
} |
Oops, something went wrong.